includes:
  in_header: preamble.tex
  
library(ggplot2)
library(plm)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
## 
##     between, lag, lead
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(writexl)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(readr)
library(nlme)
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
library(stringr)
library(broom)
library(tidyr)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(PerformanceAnalytics)
## Loading required package: xts
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(Metrics)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(glue)

EFPIA/ Europe

EFPIA R&D spending by country by year in euros

From yearly R&D reports 2004: Link Removed. Screenshot can be found in original data folder 2005: https://www.efpia.eu/media/15485/the-pharmaceutical-industry-in-figures-edition-2007.pdf 2006: https://www.efpia.eu/media/15486/the-pharmaceutical-industry-in-figures-edition-2008.pdf 2007: https://www.pharmine.eu/wp-content/uploads/2014/04/The_Pharma_Industry_in_figures_Edition_2009-20080612-009-EN-v1.pdf 2008: https://www.sfee.gr/wp-content/uploads/2015/05/Figures_2010_Final-20100611-001-EN-v1_0.pdf 2009: https://www.efpia.eu/media/15489/the-pharmaceutical-industry-in-figures-edition-2011.pdf 2010: https://www.sfee.gr/wp-content/uploads/2015/05/EFPIA_Figures_2012_Final.pdf Data for 2011 could not be found 2012: https://www.phar-in.eu/wp-content/uploads/2014/05/Figures_2014_Final.pdf 2013: https://www.efpia.eu/media/25822/2015-the-pharmaceutical-industry-in-figures.pdf 2014: https://www.efpia.eu/media/25055/the-pharmaceutical-industry-in-figures-june-2016.pdf 2015: https://www.efpia.eu/media/219735/efpia-pharmafigures2017_statisticbroch_v04-final.pdf 2016: https://www.efpia.eu/media/361960/efpia-pharmafigures2018_v07-hq.pdf 2017: https://www.efpia.eu/media/412931/the-pharmaceutical-industry-in-figures-2019.pdf 2018: https://www.efpia.eu/media/554521/efpia_pharmafigures_2020_web.pdf 2019: https://www.efpia.eu/media/602709/the-pharmaceutical-industry-in-figures-2021.pdf 2020: https://www.efpia.eu/media/637143/the-pharmaceutical-industry-in-figures-2022.pdf 2021: https://www.efpia.eu/media/rm4kzdlx/the-pharmaceutical-industry-in-figures-2023.pdf

efpia_rd <- read.csv("Cleaned Data/Area Level R&D Spending/EFPIA R&D Spending by Country in EU.csv")
efpia_rd <- clean_names(efpia_rd)
efpia_rd

formatting

efpia_rd$r_d_spending_millions_euros <- gsub(",", "", efpia_rd$r_d_spending_millions_euros)
efpia_rd$r_d_spending_millions_euros <- as.numeric(efpia_rd$r_d_spending_millions_euros)
## Warning: NAs introduced by coercion
efpia_rd

sum total EU by year

efpia_rd_totals <- efpia_rd %>%
  group_by(year) %>%
  summarise(total_mil_euros = sum(r_d_spending_millions_euros, na.rm = TRUE))
efpia_rd_totals

Imputing 2011 with average between 2011 and 2012 efpia_rd_totals

rd_2012 <- efpia_rd_totals[efpia_rd_totals$year == 2012, ]$total_mil_euros
rd_2010 <- efpia_rd_totals[efpia_rd_totals$year == 2010, ]$total_mil_euros
avg_2010_2012 <- (rd_2010 + rd_2012) / 2

efpia_rd_totals <- rbind(efpia_rd_totals, c(2011, avg_2010_2012))
efpia_rd_totals <- efpia_rd_totals[order(efpia_rd_totals$year),]
efpia_rd_totals

Dollars to euros https://www.macrotrends.net/2548/euro-dollar-exchange-rate-historical-chart

usdtoeuroavgconversion <- read_xlsx("Original Data/USD to Euro Conversion Avg by Year.xlsx")
usdtoeuroavgconversion
eurotodollars <- as.data.frame(cbind(usdtoeuroavgconversion$Year, 1/usdtoeuroavgconversion$`Average Closing Price`))
colnames(eurotodollars) <- c("year", "euros_to_dollar_conversion")
eurotodollars

multiply conversion rate to get r&d spending in dollars

rd_conversion_merged_df <- merge(efpia_rd_totals, eurotodollars, by = "year")
rd_conversion_merged_df
efpia_rd_totals$total_rd_mil_dollars <- rd_conversion_merged_df$euros_to_dollar_conversion * rd_conversion_merged_df$total_mil_euros
efpia_rd_totals$annual_domestic_RD_spending_bil_dollars <- efpia_rd_totals$total_rd_mil_dollars /1000

efpia_rd_totals

EU orphan drug market authorizations

EU Orphan Drugs were scraped from a pdf https://www.orpha.net/pdfs/orphacom/cahiers/docs/GB/list_of_orphan_drugs_in_europe.pdf it can be found in the the Original Data Folder

eu_orphan_drugs <- read_xlsx("Cleaned Data/Area Level Orphan Authorizations/EU_Orphan_w_Company from https___www.orpha.net_orphacom_cahiers_docs_GB_list_of_orphan_drugs_in_europe.pdf.xlsx")
eu_orphan_drugs <- clean_names(eu_orphan_drugs)
eu_orphan_drugs

Check for duplicated rows

eu_orphan_drugs[duplicated(eu_orphan_drugs),]

EU number of market authorizations for each drug

eu_ma_freq <- eu_orphan_drugs %>%
  count(tradename) %>%
  arrange(desc(n))

colnames(eu_ma_freq) <- c("Generic Name", "Market Authorization Frequency")
eu_ma_freq$`Generic Name` <- tolower(eu_ma_freq$`Generic Name`)
eu_ma_freq

How many ODs with more than 1 authorization

nrow(eu_ma_freq[eu_ma_freq$`Market Authorization Frequency` > 1, ])
## [1] 3
stargazer(eu_ma_freq[1:3,], title= "EU Market Authorization Frequencies for Orphan Drugs with Over 1 Market Authorization", rownames = FALSE, align=TRUE, summary=FALSE, type = "html", out="Final Figures Formatted/EU_OD_Multiple_MAs.htm")
## 
## <table style="text-align:center"><caption><strong>EU Market Authorization Frequencies for Orphan Drugs with Over 1 Market Authorization</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Generic Name</td><td>Market Authorization Frequency</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>carbaglu</td><td>2</td></tr>
## <tr><td>nexavar</td><td>2</td></tr>
## <tr><td>soliris</td><td>2</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr></table>

Look at where there are multiple market authorizations for the same drug

eu_orphan_drugs[eu_orphan_drugs$tradename == "CARBAGLU",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "NEXAVAR",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "SOLIRIS",]

Select only new drugs in the EU

eu_orphan_drugs_new <- eu_orphan_drugs %>%
  group_by(tradename) %>%  
  slice_min(order_by = market_authorization_year, n = 1, with_ties = FALSE) %>%  # Select the entry with the earliest market authorization year
  ungroup() 

eu_orphan_drugs_new

format new EU orphan drugs by year

eu_new_od_ma_freqs <- as.data.frame(table(eu_orphan_drugs_new$market_authorization_year))
colnames(eu_new_od_ma_freqs) <- c("Year", "Freq New Orphan Drug MAs")
eu_new_od_ma_freqs
eu_new_od_ma_freqs$Year <- as.character(eu_new_od_ma_freqs$Year)
eu_new_od_ma_freqs$Year <- as.numeric(eu_new_od_ma_freqs$Year)
eu_new_od_ma_freqs <- eu_new_od_ma_freqs[order(-eu_new_od_ma_freqs$Year),] 

head(eu_new_od_ma_freqs)

Select all MAs in the EU

format EU market authorizations by year

eu_od_ma_freqs <- as.data.frame(table(eu_orphan_drugs$market_authorization_year))
colnames(eu_od_ma_freqs) <- c("Year", "Freq Orphan Drug MAs")
eu_od_ma_freqs
eu_od_ma_freqs$Year <- as.character(eu_od_ma_freqs$Year)
eu_od_ma_freqs$Year <- as.numeric(eu_od_ma_freqs$Year)
eu_od_ma_freqs <- eu_od_ma_freqs[order(-eu_od_ma_freqs$Year),] 
head(eu_od_ma_freqs)
eu_od_ma_freqs <- merge(eu_od_ma_freqs, eu_new_od_ma_freqs, by = "Year")
eu_od_ma_freqs

Graph of EU Market Authorizations orphan drugs

ggplot(eu_od_ma_freqs, aes(x = Year)) +
  geom_line(aes(y = `Freq Orphan Drug MAs`, color="Orphan Drug MAs")) +
  geom_line(aes(y = `Freq New Orphan Drug MAs`, color="New Orphan Drug MAs")) +
  labs(x = "Year",
       y = str_wrap("Annual Number of Market Authorizations in the EU", width=40),
       title=str_wrap("Annual Number of Orphan Drug Market Authorizations in the EU", width=50)
       ) +
  scale_color_manual(name = "Color", values = c("darkred", "dodgerblue"))+
  theme_minimal()

merge orphan drug and R&D spending tables

colnames(efpia_rd_totals) <- c("Year", "total_rd_euros", "total_rd_mil_dollars" , "total_rd_bil_dollars")
eu_rd_mas <- merge(efpia_rd_totals,eu_od_ma_freqs,by="Year")
eu_rd_mas <- clean_names(eu_rd_mas)
eu_rd_mas

PhRMA/ United States

PhRMA R&D spending by year

https://phrma.org/-/media/Project/PhRMA/PhRMA-Org/PhRMA-Refresh/Report-PDFs/P-R/PhRMA_membership-survey_2022_final.pdf

phrma_us_rd <- read.csv("Cleaned Data/Area Level R&D Spending/US Domestic R&D and R&D Abroad,* PhRMA Member Companies- 1983–2021 (dollar figures in millions) .csv")
head(phrma_us_rd)

US orphan drug Market Authorizations

https://www.accessdata.fda.gov/scripts/opdlisting/oopd/ Only approved products

us_orphan_drugs <- read.csv("Original Data/Area Level Orphan Authorizations/US_FDA_OD.csv")

us_orphan_drugs <- clean_names(us_orphan_drugs)
us_orphan_drugs <- us_orphan_drugs[!is.na(us_orphan_drugs$marketing_approval_date), ] 

us_orphan_drugs$marketing_approval_date <- as.Date(us_orphan_drugs$marketing_approval_date, format="%m/%d/%y")

us_orphan_drugs[order(us_orphan_drugs$marketing_approval_date),]

Check for duplicated rows Remove one row because duplicated

us_orphan_drugs[duplicated(us_orphan_drugs),]
us_orphan_drugs[us_orphan_drugs$generic_name == "tacrolimus" ,]
us_orphan_drugs <- us_orphan_drugs[-1119, ]
us_orphan_drugs[duplicated(us_orphan_drugs),]

Number of designations per product

us_ma_freq <- us_orphan_drugs %>%
  count(generic_name) %>%
  arrange(desc(n))
colnames(us_ma_freq) <- c("Generic Name", "Market Authorization Frequency")

us_ma_freq

How many ODs with more than 1 authorization

nrow(us_ma_freq[us_ma_freq$`Market Authorization Frequency` > 1, ])
## [1] 223
stargazer(us_ma_freq[1:19,], title= "US Market Authorization Frequencies for Orphan Drugs with Over 5 Market Authorizations", rownames = FALSE, align=TRUE, summary=FALSE, type = "html", out="Final Figures Formatted/US_OD_Multiple_MAs.htm")
## 
## <table style="text-align:center"><caption><strong>US Market Authorization Frequencies for Orphan Drugs with Over 5 Market Authorizations</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Generic Name</td><td>Market Authorization Frequency</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>nivolumab</td><td>16</td></tr>
## <tr><td>pembrolizumab</td><td>16</td></tr>
## <tr><td>bevacizumab</td><td>11</td></tr>
## <tr><td>ibrutinib</td><td>11</td></tr>
## <tr><td>ivacaftor</td><td>11</td></tr>
## <tr><td>olaparib</td><td>11</td></tr>
## <tr><td>lenalidomide</td><td>9</td></tr>
## <tr><td>lisocabtagene maraleucel</td><td>9</td></tr>
## <tr><td>adalimumab</td><td>8</td></tr>
## <tr><td>axicabtagene ciloleucel</td><td>7</td></tr>
## <tr><td>brentuximab vedotin</td><td>7</td></tr>
## <tr><td>crizotinib</td><td>7</td></tr>
## <tr><td>daratumumab</td><td>7</td></tr>
## <tr><td>ravulizumab-cwvz</td><td>7</td></tr>
## <tr><td>zanubrutinib</td><td>7</td></tr>
## <tr><td>canakinumab</td><td>6</td></tr>
## <tr><td>isavuconazonium sulfate</td><td>6</td></tr>
## <tr><td>rituximab</td><td>6</td></tr>
## <tr><td>selpercatinib</td><td>6</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr></table>

Select only new drugs in the US

us_orphan_drugs_new <- us_orphan_drugs %>%
  group_by(generic_name) %>%  
  slice_min(order_by = marketing_approval_date, n = 1, with_ties = FALSE) %>%  # Select the entry with the earliest market authorization year
  ungroup() 

us_orphan_drugs_new
us_orphan_drugs_new$Marketing.year <- year(as.Date(us_orphan_drugs_new$marketing_approval_date)) 
us_new_od_ma_freqs <- as.data.frame(table(us_orphan_drugs_new$Marketing.year))
colnames(us_new_od_ma_freqs) <- c("Year", "Freq New Orphan Drug MAs")
us_new_od_ma_freqs

Make sure matches the “Number of Orphan Drug Approvals: 1262” from https://www.accessdata.fda.gov/scripts/opdlisting/oopd/listResult.cfm on 9/29/2024

nrow(us_orphan_drugs[year(as.Date(us_orphan_drugs$marketing_approval_date)), ])
## [1] 1262

Select all MAs in the US format US MA date as year

us_orphan_drugs$Marketing.year <- year(as.Date(us_orphan_drugs$marketing_approval_date)) 
us_od_ma_freqs <- as.data.frame(table(us_orphan_drugs$Marketing.year))
colnames(us_od_ma_freqs) <- c("Year", "Freq Orphan Drug MAs")

us_od_ma_freqs$Year <- as.character(us_od_ma_freqs$Year)
us_od_ma_freqs$Year <- as.numeric(us_od_ma_freqs$Year)
us_od_ma_freqs <- us_od_ma_freqs[order(-us_od_ma_freqs$Year),] 

us_od_ma_freqs <- merge(us_od_ma_freqs, us_new_od_ma_freqs, by = "Year")

us_od_ma_freqs

Graph of US Market Authorizations orphan drugs

ggplot(us_od_ma_freqs, aes(x = Year)) +
  geom_line(aes(y = `Freq Orphan Drug MAs`, color="Orphan Drug MAs")) +
  geom_line(aes(y = `Freq New Orphan Drug MAs`, color="New Orphan Drug MAs")) +
  labs(x = "Year",
       y = str_wrap("Annual Number of Market Authorizations in the US", width=40),
       title=str_wrap("Annual Number of Orphan Drug Market Authorizations in the US", width=50), 
       ) +
  scale_color_manual(name = "Color", values = c("darkred", "dodgerblue"))+
  theme_minimal()

us_orphan_drugs

merge US orphan drug df with PhRMA r&d df

phrma_us_rd <- phrma_us_rd[-dim(phrma_us_rd)[1],]
us_rd_mas <- merge(phrma_us_rd,us_od_ma_freqs,by="Year")
us_rd_mas

format

us_rd_mas$Domestic.R.D. <- parse_number(us_rd_mas$Domestic.R.D.) #dollars are in millions
us_rd_mas <- clean_names(us_rd_mas)
us_rd_mas

us dataframe simplified

us_rd_mas$annual_domestic_RD_spending_bil_dollars <- us_rd_mas$domestic_r_d /1000 #convert from millions of dollars to billions
us_rd_mas_simplified <- as.data.frame(cbind(us_rd_mas$year, us_rd_mas$annual_domestic_RD_spending_bil_dollars, us_rd_mas$freq_orphan_drug_m_as, us_rd_mas$freq_new_orphan_drug_m_as))
colnames(us_rd_mas_simplified) <- c("year", "annual_domestic_RD_spending_bil_dollars", "freq_orphan_drug_mas", "freq_new_orphan_drug_mas")
us_rd_mas_simplified$area <- rep("US", dim(us_rd_mas_simplified)[1])
us_rd_mas_simplified$lagged_annual_domestic_RD_spending_bil_dollars <- lag(us_rd_mas_simplified$annual_domestic_RD_spending_bil_dollars)
us_rd_mas_simplified

eu dataframe simplified

eu_rd_mas_simplified <- as.data.frame(cbind(eu_rd_mas$year, eu_rd_mas$total_rd_bil_dollars, eu_rd_mas$freq_orphan_drug_m_as, eu_rd_mas$freq_new_orphan_drug_m_as))
colnames(eu_rd_mas_simplified) <- c("year", "annual_domestic_RD_spending_bil_dollars", "freq_orphan_drug_mas", "freq_new_orphan_drug_mas")
eu_rd_mas_simplified$area <- rep("EU", dim(eu_rd_mas_simplified)[1])
eu_rd_mas_simplified$lagged_annual_domestic_RD_spending_bil_dollars <- lag(eu_rd_mas_simplified$annual_domestic_RD_spending_bil_dollars)
eu_rd_mas_simplified

EU GDP Per Capita In Dollars

https://www.macrotrends.net/global-metrics/countries/EUU/european-union/gdp-per-capita

eu_gdp_percapita_annual <- read.csv("Original Data/GDP/european-union-gdp-per-capita-annual.csv", skip=16)
eu_gdp_percapita_annual$year <- year(as.Date(eu_gdp_percapita_annual$date))
eu_gdp_percapita_annual <- clean_names(eu_gdp_percapita_annual)
colnames(eu_gdp_percapita_annual)[2] <- "gdp_per_capita_usd"
colnames(eu_gdp_percapita_annual)[3] <- "gdp_per_capita_annual_growthrate_usd"
eu_gdp_percapita_annual

Merge GDP Per Capita, GDP Growth rate into same df

eu_rd_mas_simplified <- merge(eu_rd_mas_simplified, eu_gdp_percapita_annual[,c("year", "gdp_per_capita_annual_growthrate_usd")], by = "year", all.x = TRUE)
eu_rd_mas_simplified

US GDP Per Capita In Dollars

https://www.macrotrends.net/global-metrics/countries/USA/united-states/gdp-per-capita

us_gdp_percapita_annual <- read.csv("Original Data/GDP/united-states-gdp-per-capita-annual.csv", skip=16)
us_gdp_percapita_annual$year <- year(as.Date(us_gdp_percapita_annual$date))

us_gdp_percapita_annual <- clean_names(us_gdp_percapita_annual)

colnames(us_gdp_percapita_annual)[2] <- "gdp_per_capita_usd"
colnames(us_gdp_percapita_annual)[3] <- "gdp_per_capita_annual_growthrate_usd"

us_gdp_percapita_annual

Merge GDP Per Capita, GDP Growth rate into same df

us_rd_mas_simplified <- merge(us_rd_mas_simplified, us_gdp_percapita_annual[,c("year", "gdp_per_capita_annual_growthrate_usd")], by = "year", all.x = TRUE)
us_rd_mas_simplified

Limit dataframe to 2004 and later, make lagged R&D spending NA at year 2004

combined_rd_ma_df <- rbind(eu_rd_mas_simplified, us_rd_mas_simplified)
combined_rd_ma_df <- combined_rd_ma_df %>%
  filter(year > 2003)
combined_rd_ma_df[combined_rd_ma_df$area =="US" & combined_rd_ma_df$year==2004,]$lagged_annual_domestic_RD_spending_bil_dollars <-  NA
combined_rd_ma_df
ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_orphan_drug_mas, color = area)) +
  geom_point() +
  labs(x = "Annual Domestic RD Spending (Billions USD)",
       y = "Number of Orphan Drug Market Authorizations",
       color = "Area",
       title=str_wrap("Orphan Drug Market Authorizations by Annual Domestic R&D Spending in the EU and US", 60)) +
  scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  theme_minimal()

ggplot(combined_rd_ma_df, aes(x = year , y = freq_orphan_drug_mas, color = area)) +
  geom_line() +
  labs(x = "Year",
       y = str_wrap("Annual Number of Orphan Drug Market Authorizations", width=30),
       color = "Area",
       title=str_wrap("Annual Number of Orphan Drug Market Authorizations in the EU and US", width=40)) +
    theme_minimal() +
    scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100'))

ggplot(combined_rd_ma_df, aes(x = year , y = freq_new_orphan_drug_mas, color = area)) +
  geom_line() +
  labs(x = "Year",
       y = str_wrap("Annual Number of Market Authorizations for New Orphan Drugs", width=40),
       color = "Area",
       title=str_wrap("Annual Number of Market Authorizations for New Orphan Drugs in the EU and US", width=40)) +
    theme_minimal() +
    scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100')) 

ggplot(combined_rd_ma_df, aes(x = year, y = annual_domestic_RD_spending_bil_dollars, color = area)) +
  geom_line() +
  labs(x = "Year",
       y = "Annual Domestic R&D Spending (Billions USD)",
       color = "Area",
       title="Annual Domestic R&D Spending in the EU and US") +
    scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  theme_minimal()

ggplot(combined_rd_ma_df, aes(x = year, y = gdp_per_capita_annual_growthrate_usd, color = area)) +
  geom_line() +
  labs(x = "Year",
       y = "GDP Per Capita on last day of Year (USD)",
       color = "Area",
       title="GDP Per Capita on last day of Year in the EU and US") +
    scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  theme_minimal()

Orphan Drug Frequencies by Year by Area

hist(subset(combined_rd_ma_df, area=="US")$freq_orphan_drug_mas, main="US Frequencies of Annual Orphan Drug Market Authorizations", xlab="Number of Annual US Orphan Drug Market Authorizations")

hist(subset(combined_rd_ma_df, area=="EU")$freq_orphan_drug_mas, main="EU Frequencies of Annual Orphan Drug Market Authorizations",xlab="Number of Annual EU Orphan Drug Market Authorizations")

New Orphan Drugs Frequencies by Year by Area

hist(subset(combined_rd_ma_df, area=="US")$freq_new_orphan_drug_mas, main="US Frequencies of Annual New Orphan Drug Market Authorizations", xlab="Number of Annual US Orphan Drug Market Authorizations")

hist(subset(combined_rd_ma_df, area=="EU")$freq_new_orphan_drug_mas, main="EU Frequencies of Annual New Orphan Drug Market Authorizations",xlab="Number of Annual EU Orphan Drug Market Authorizations")

Making a years after 2004 variable

combined_rd_ma_df$yearsafter2004 <- combined_rd_ma_df$year - 2004
combined_rd_ma_df

Summmaries

R&D spending growth rates by year

rdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "EU", ]$annual_domestic_RD_spending_bil_dollars 
laggedrdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "EU", ]$lagged_annual_domestic_RD_spending_bil_dollars 
EU_annual_domestic_RD_spending_bil_dollars_growth_rate <- rdspending/laggedrdspending
EU_annual_domestic_RD_spending_bil_dollars_growth_rate
##  [1]        NA 1.0296598 1.1212038 0.9661782 0.9511425 1.0932108 1.0586718
##  [8] 0.9953716 1.1192369 0.9830681 1.0146180 1.3017754 1.0116816 1.0219124
## [15] 0.9845789 1.0954102 1.0319511 1.0361913
rdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "US", ]$annual_domestic_RD_spending_bil_dollars 
laggedrdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "US", ]$lagged_annual_domestic_RD_spending_bil_dollars 
US_annual_domestic_RD_spending_bil_dollars_growth_rate <- rdspending/laggedrdspending
US_annual_domestic_RD_spending_bil_dollars_growth_rate
##  [1]        NA 1.0478253 1.0968355 1.0777352 0.9716650 0.9939530 1.1508117
##  [8] 0.8939616 1.0312479 1.0769337 1.0084489 1.1809938 1.0895376 1.0636573
## [15] 1.1159483 1.0343509 1.1251628 1.0994019
rdspending_growth_rates <- data.frame("Year"=seq(2004,2021), "US"=round(US_annual_domestic_RD_spending_bil_dollars_growth_rate,2), "EU"=round(EU_annual_domestic_RD_spending_bil_dollars_growth_rate,2))

rdspending_growth_rates$US <- as.character(rdspending_growth_rates$US )
rdspending_growth_rates$EU <- as.character(rdspending_growth_rates$EU )

# make 2011 NA because 2011 values were imputed 
rdspending_growth_rates[rdspending_growth_rates$Year == 2011,]$EU <- "NA"
rdspending_growth_rates[rdspending_growth_rates$Year == 2012,]$EU <- "NA"

rdspending_growth_rates
stargazer(rdspending_growth_rates,  out="Final Figures Formatted/RDSpendingAnnualGrowthRates.htm" , summary=FALSE, rownames=FALSE, colnames = TRUE, digits=2, digit.separator="",column.sep.width= "15pt", title="Annual Growth Rate of Phramaceutical R&D Spending")
## 
## % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
## % Date and time: Wed, Oct 30, 2024 - 14:06:11
## \begin{table}[!htbp] \centering 
##   \caption{Annual Growth Rate of Phramaceutical R&D Spending} 
##   \label{} 
## \begin{tabular}{@{\extracolsep{15pt}} ccc} 
## \\[-1.8ex]\hline 
## \hline \\[-1.8ex] 
## Year & US & EU \\ 
## \hline \\[-1.8ex] 
## $2004$ &  &  \\ 
## $2005$ & 1.05 & 1.03 \\ 
## $2006$ & 1.1 & 1.12 \\ 
## $2007$ & 1.08 & 0.97 \\ 
## $2008$ & 0.97 & 0.95 \\ 
## $2009$ & 0.99 & 1.09 \\ 
## $2010$ & 1.15 & 1.06 \\ 
## $2011$ & 0.89 & NA \\ 
## $2012$ & 1.03 & NA \\ 
## $2013$ & 1.08 & 0.98 \\ 
## $2014$ & 1.01 & 1.01 \\ 
## $2015$ & 1.18 & 1.3 \\ 
## $2016$ & 1.09 & 1.01 \\ 
## $2017$ & 1.06 & 1.02 \\ 
## $2018$ & 1.12 & 0.98 \\ 
## $2019$ & 1.03 & 1.1 \\ 
## $2020$ & 1.13 & 1.03 \\ 
## $2021$ & 1.1 & 1.04 \\ 
## \hline \\[-1.8ex] 
## \end{tabular} 
## \end{table}

Linear Models of Annual Domestic R&D Spending

Plot Linear Models of Annual Domestic R&D Spending

plot_linear_rd <- function(linear_model, model_number){
  
  #Plot residuals and acf/ pacf of residuals by panel (EU/US)
  combined_rd_ma_df$residuals <- resid(linear_model)
  par(mfrow =c(2,4))
  
  us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
  eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
  
  plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
  acf(us_resid_data$residuals, main="ACF of US Residuals")
  pacf(us_resid_data$residuals, main="PACF of US Residuals")
  qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
  
  
  plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
  acf(eu_resid_data$residuals, main="ACF of EU Residuals")
  pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
  qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
  
  mtext(glue('Linear Model {model_number} of Annual Domestic R&D Spending - Residual Plots'), side = 3, line = -1.1, outer = TRUE)

  predicted <- predict(linear_model , type = "response")

  ggplot(combined_rd_ma_df, aes(x = year, y = annual_domestic_RD_spending_bil_dollars, color = area, alpha="Observed")) +
    geom_point() +
    labs(x = "Year",
         y = "Annual Domestic R&D Spending (Billions USD)",
         color = "Area",
         alpha = "Line Type",
         title=glue('Linear Model {model_number} of Annual Domestic R&D Spending'))+
      scale_color_manual(name = "Area", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
    geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
    scale_alpha_manual(values = c(0.5, 1))+
  theme_minimal()
    
  # https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
  #Controlling legend appearance in ggplot2 with override.aes
  #July 9, 2020 ·  @aosmith16 
  #Ariel Muldoon
  
  # https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
  # Teun van den Brand
  # 2024/02/26

}
get_rd_model_metrics <- function(model, model_type){
  #Get RMSE
  predicted <- predict(model, type = "response")
  actual <- combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars
  model_rmse <- rmse(actual=actual, predicted=predicted)

  model_df1 <- summary(model)$fstatistic[2]
  model_df2 <- summary(model)$fstatistic[3]
  model_fstat <- summary(model)$fstatistic[1]
  model_pval <- pf(summary(model)$fstatistic[1], summary(model)$fstatistic[2], summary(model)$fstatistic[3], lower.tail = FALSE) 
  # https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
  # Renesh Bedre
  # March 26, 2023
    
  model_adjRsq <- summary(model)$adj.r.squared
  model_aic <- AIC(model)
  
  metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq)
  rownames(metrics_df) <- NULL

  return(metrics_df)
}

Linear Model 1 of Annual Domestic R&D Spending

mod.rd.1 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) , data=combined_rd_ma_df)
summary(mod.rd.1)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area), 
##     data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.701  -7.570  -4.119   6.193  33.354 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         24.970      2.712   9.208 9.23e-11 ***
## as.factor(area)US   21.286      3.835   5.551 3.31e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.5 on 34 degrees of freedom
## Multiple R-squared:  0.4754, Adjusted R-squared:   0.46 
## F-statistic: 30.81 on 1 and 34 DF,  p-value: 3.305e-06
plot_linear_rd(mod.rd.1, "1")

#Estimate covariance matrix with Newey West
mod.rd.1.vcov <- vcovPL(mod.rd.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.1 <- coeftest(mod.rd.1, vcov = mod.rd.1.vcov) 
mod.rd.nw.1
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)        24.9701     2.4554 10.1695 7.571e-12 ***
## as.factor(area)US  21.2863     3.1408  6.7774 8.581e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.1)
##                      2.5 %   97.5 %
## (Intercept)       19.98015 29.96002
## as.factor(area)US 14.90350 27.66917
#Get Metrics
mod.rd.1.metrics <- get_rd_model_metrics(mod.rd.1, "Linear")

Linear Model 2 of Annual Domestic R&D Spending

mod.rd.2 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) + yearsafter2004, data=combined_rd_ma_df)
summary(mod.rd.2)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + 
##     yearsafter2004, data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.3415 -3.6925 -0.8054  1.4060 17.3602 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         8.9767     2.0536   4.371 0.000116 ***
## as.factor(area)US  21.2863     1.8977  11.217 8.46e-13 ***
## yearsafter2004      1.8816     0.1829  10.288 7.91e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.693 on 33 degrees of freedom
## Multiple R-squared:  0.8753, Adjusted R-squared:  0.8678 
## F-statistic: 115.8 on 2 and 33 DF,  p-value: 1.205e-15
plot_linear_rd(mod.rd.2, "2")

#Estimate covariance matrix with Newey West
mod.rd.2.vcov <- vcovPL(mod.rd.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.2 <- coeftest(mod.rd.2, vcov = mod.rd.2.vcov) 
mod.rd.nw.2
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)        8.97674    3.27862   2.738  0.009885 ** 
## as.factor(area)US 21.28634    3.18801   6.677 1.335e-07 ***
## yearsafter2004     1.88157    0.22857   8.232 1.662e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.2)
##                       2.5 %    97.5 %
## (Intercept)        2.306343 15.647146
## as.factor(area)US 14.800276 27.772399
## yearsafter2004     1.416543  2.346596
#Get Metrics
mod.rd.2.metrics <- get_rd_model_metrics(mod.rd.2, "Linear")

Linear Model 3 of Annual Domestic R&D Spending

mod.rd.3 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) + gdp_per_capita_annual_growthrate_usd + yearsafter2004, data=combined_rd_ma_df)
summary(mod.rd.3)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + 
##     gdp_per_capita_annual_growthrate_usd + yearsafter2004, data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.4165 -3.1174 -0.8098  2.6095 15.7651 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            8.1200     2.1801   3.725 0.000754 ***
## as.factor(area)US                     21.2716     1.8896  11.257 1.16e-12 ***
## gdp_per_capita_annual_growthrate_usd   0.1831     0.1616   1.133 0.265549    
## yearsafter2004                         1.9132     0.1842  10.385 8.93e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.669 on 32 degrees of freedom
## Multiple R-squared:  0.8801, Adjusted R-squared:  0.8689 
## F-statistic: 78.31 on 3 and 32 DF,  p-value: 7.91e-15
plot_linear_rd(mod.rd.3, "3")

#Estimate covariance matrix with Newey West
mod.rd.3.vcov <- vcovPL(mod.rd.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.3 <- coeftest(mod.rd.3, vcov = mod.rd.3.vcov)
mod.rd.nw.3
## 
## t test of coefficients:
## 
##                                      Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                           8.12002    2.91992  2.7809  0.009008 ** 
## as.factor(area)US                    21.27158    3.22110  6.6038 1.913e-07 ***
## gdp_per_capita_annual_growthrate_usd  0.18314    0.10027  1.8266  0.077107 .  
## yearsafter2004                        1.91322    0.20108  9.5147 7.547e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.3)
##                                            2.5 %     97.5 %
## (Intercept)                           2.17234330 14.0676976
## as.factor(area)US                    14.71041811 27.8327508
## gdp_per_capita_annual_growthrate_usd -0.02109303  0.3873803
## yearsafter2004                        1.50363514  2.3228064
#Get Metrics
mod.rd.3.metrics <- get_rd_model_metrics(mod.rd.3, "Linear")

VIFs of preditors

vif(mod.rd.3)
##                      as.factor(area) gdp_per_capita_annual_growthrate_usd 
##                             1.000047                             1.023572 
##                       yearsafter2004 
##                             1.023525

Linear Model 4 of Annual Domestic R&D Spending

mod.rd.4 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004, data=combined_rd_ma_df)
summary(mod.rd.4)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + 
##     yearsafter2004 + as.factor(area):yearsafter2004, data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.3946 -2.1954  0.9159  1.8342 11.3927 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       14.9442     1.9458   7.680 9.38e-09 ***
## as.factor(area)US                  9.3514     2.7518   3.398  0.00183 ** 
## yearsafter2004                     1.1795     0.1954   6.036 9.77e-07 ***
## as.factor(area)US:yearsafter2004   1.4041     0.2763   5.081 1.57e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.301 on 32 degrees of freedom
## Multiple R-squared:  0.931,  Adjusted R-squared:  0.9245 
## F-statistic: 143.9 on 3 and 32 DF,  p-value: < 2.2e-16
plot_linear_rd(mod.rd.4, "4")

#Estimate covariance matrix with Newey West
mod.rd.4.vcov <- vcovPL(mod.rd.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.4 <- coeftest(mod.rd.4, vcov = mod.rd.4.vcov) 
mod.rd.nw.4
## 
## t test of coefficients:
## 
##                                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                      14.944238   1.138132 13.1305 1.972e-14 ***
## as.factor(area)US                 9.351351   2.470300  3.7855 0.0006372 ***
## yearsafter2004                    1.179511   0.095676 12.3282 1.073e-13 ***
## as.factor(area)US:yearsafter2004  1.404116   0.321119  4.3726 0.0001217 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.4)
##                                       2.5 %    97.5 %
## (Intercept)                      12.6259391 17.262536
## as.factor(area)US                 4.3195141 14.383188
## yearsafter2004                    0.9846264  1.374396
## as.factor(area)US:yearsafter2004  0.7500184  2.058214
#Get Metrics
mod.rd.4.metrics <- get_rd_model_metrics(mod.rd.4, "Linear")

Linear Model 5 of Annual Domestic R&D Spending

mod.rd.5 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) + gdp_per_capita_annual_growthrate_usd + yearsafter2004 + yearsafter2004:as.factor(area), data=combined_rd_ma_df)
summary(mod.rd.5)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + 
##     gdp_per_capita_annual_growthrate_usd + yearsafter2004 + yearsafter2004:as.factor(area), 
##     data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4057 -2.2988  0.3359  1.5807 10.7972 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          14.42340    2.11349   6.824 1.20e-07 ***
## as.factor(area)US                     9.60610    2.80244   3.428  0.00174 ** 
## gdp_per_capita_annual_growthrate_usd  0.08339    0.12540   0.665  0.51094    
## yearsafter2004                        1.20930    0.20215   5.982 1.29e-06 ***
## as.factor(area)US:yearsafter2004      1.37335    0.28259   4.860 3.21e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.339 on 31 degrees of freedom
## Multiple R-squared:  0.932,  Adjusted R-squared:  0.9232 
## F-statistic: 106.2 on 4 and 31 DF,  p-value: < 2.2e-16
plot_linear_rd(mod.rd.5, "5")

#Estimate covariance matrix with Newey West
mod.rd.5.vcov <- vcovPL(mod.rd.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.5 <- coeftest(mod.rd.5, vcov = mod.rd.5.vcov)
mod.rd.nw.5
## 
## t test of coefficients:
## 
##                                       Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                          14.423396   0.895858 16.1001 < 2.2e-16 ***
## as.factor(area)US                     9.606103   2.668712  3.5995 0.0010962 ** 
## gdp_per_capita_annual_growthrate_usd  0.083394   0.078564  1.0615 0.2966708    
## yearsafter2004                        1.209304   0.073613 16.4280 < 2.2e-16 ***
## as.factor(area)US:yearsafter2004      1.373355   0.330965  4.1496 0.0002407 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.5)
##                                            2.5 %     97.5 %
## (Intercept)                          12.59628170 16.2505110
## as.factor(area)US                     4.16322798 15.0489771
## gdp_per_capita_annual_growthrate_usd -0.07683769  0.2436255
## yearsafter2004                        1.05917056  1.3594380
## as.factor(area)US:yearsafter2004      0.69834823  2.0483617
#Get RMSE
predicted <- predict(mod.rd.5, type = "response")
actual <- combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars
mod.rd.5.rmse <- rmse(actual=actual, predicted=predicted)

#Get Metrics
mod.rd.5.metrics <- get_rd_model_metrics(mod.rd.5, "Linear")

Linear Model 6 of Annual Domestic R&D Spending

mod.rd.6 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) + gdp_per_capita_annual_growthrate_usd + yearsafter2004 + yearsafter2004:as.factor(area) +  +I(yearsafter2004^2) + I(yearsafter2004^2):as.factor(area), data=combined_rd_ma_df)
summary(mod.rd.6)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + 
##     gdp_per_capita_annual_growthrate_usd + yearsafter2004 + yearsafter2004:as.factor(area) + 
##     +I(yearsafter2004^2) + I(yearsafter2004^2):as.factor(area), 
##     data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2500 -0.9883 -0.3515  1.2539  5.1768 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           18.14473    1.63419  11.103 5.84e-12 ***
## as.factor(area)US                     15.85117    1.99412   7.949 9.12e-09 ***
## gdp_per_capita_annual_growthrate_usd  -0.08329    0.06975  -1.194   0.2421    
## yearsafter2004                         0.14465    0.42207   0.343   0.7343    
## I(yearsafter2004^2)                    0.05912    0.02348   2.518   0.0176 *  
## as.factor(area)US:yearsafter2004      -1.09801    0.54375  -2.019   0.0528 .  
## as.factor(area)US:I(yearsafter2004^2)  0.14899    0.03057   4.874 3.60e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.142 on 29 degrees of freedom
## Multiple R-squared:  0.9845, Adjusted R-squared:  0.9813 
## F-statistic: 306.7 on 6 and 29 DF,  p-value: < 2.2e-16
plot_linear_rd(mod.rd.6, "6")

#Estimate covariance matrix with Newey West
mod.rd.6.vcov <- vcovPL(mod.rd.6, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.6 <- coeftest(mod.rd.6, vcov = mod.rd.6.vcov)
mod.rd.nw.6
## 
## t test of coefficients:
## 
##                                        Estimate Std. Error t value  Pr(>|t|)
## (Intercept)                           18.144732   0.916141 19.8056 < 2.2e-16
## as.factor(area)US                     15.851175   2.428745  6.5265 3.799e-07
## gdp_per_capita_annual_growthrate_usd  -0.083292   0.052153 -1.5971 0.1210926
## yearsafter2004                         0.144646   0.283582  0.5101 0.6138653
## I(yearsafter2004^2)                    0.059124   0.015743  3.7555 0.0007736
## as.factor(area)US:yearsafter2004      -1.098009   0.618239 -1.7760 0.0862235
## as.factor(area)US:I(yearsafter2004^2)  0.148991   0.034491  4.3197 0.0001669
##                                          
## (Intercept)                           ***
## as.factor(area)US                     ***
## gdp_per_capita_annual_growthrate_usd     
## yearsafter2004                           
## I(yearsafter2004^2)                   ***
## as.factor(area)US:yearsafter2004      .  
## as.factor(area)US:I(yearsafter2004^2) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.6)
##                                             2.5 %      97.5 %
## (Intercept)                           16.27101332 20.01845100
## as.factor(area)US                     10.88383349 20.81851583
## gdp_per_capita_annual_growthrate_usd  -0.18995673  0.02337328
## yearsafter2004                        -0.43534481  0.72463632
## I(yearsafter2004^2)                    0.02692573  0.09132242
## as.factor(area)US:yearsafter2004      -2.36245040  0.16643231
## as.factor(area)US:I(yearsafter2004^2)  0.07844841  0.21953376
#Get Metrics
mod.rd.6.metrics <- get_rd_model_metrics(mod.rd.6, "Linear")
mod.rd.metrics <- rbind(mod.rd.1.metrics, mod.rd.2.metrics, mod.rd.3.metrics, mod.rd.4.metrics, mod.rd.5.metrics, mod.rd.6.metrics)

aic_values <- round(mod.rd.metrics$aic)
rmse_values <- round(mod.rd.metrics$rmse, 2)
fstat_values <- round(mod.rd.metrics$fstat, 2)
p_values <- round(mod.rd.metrics$pval, 4)
adjrsq_values <- round(mod.rd.metrics$adjrsq, 2)

stargazer(mod.rd.1, mod.rd.2, mod.rd.3, mod.rd.4, mod.rd.5, mod.rd.6, 
          title="Linear Models of Annual Domestic Pharmaceutical R&D Spending (Not Adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/rdmodelsNonAdjusted.htm",
          covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>2</sub>)", "Years After 2004 (β<sup>̂</sup><sub>3</sub>)", "Years After 2004^{2} (β<sup>̂</sup><sub>4</sub>)", "US:Years After 2004 (β<sup>̂</sup><sub>5</sub>)", "US:Years After 2004^{2} (β<sup>̂</sup><sub>6</sub>)", "Intercept (β<sup>̂</sup><sub>0</sub>)"),
          dep.var.labels=c("Annual Domestic Pharmaceutical R&D Spending (Billion USD)"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Linear Models of Annual Domestic Pharmaceutical R&D Spending (Not Adjusted for Error Structure)</strong></caption>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="6">Annual Domestic Pharmaceutical R&D Spending (Billion USD)</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td><td>Model 6</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>21.286<sup>***</sup></td><td>21.286<sup>***</sup></td><td>21.272<sup>***</sup></td><td>9.351<sup>***</sup></td><td>9.606<sup>***</sup></td><td>15.851<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(3.835)</td><td>(1.898)</td><td>(1.890)</td><td>(2.752)</td><td>(2.802)</td><td>(1.994)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>2</sub>)</td><td></td><td></td><td>0.183</td><td></td><td>0.083</td><td>-0.083</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.162)</td><td></td><td>(0.125)</td><td>(0.070)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>1.882<sup>***</sup></td><td>1.913<sup>***</sup></td><td>1.180<sup>***</sup></td><td>1.209<sup>***</sup></td><td>0.145</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.183)</td><td>(0.184)</td><td>(0.195)</td><td>(0.202)</td><td>(0.422)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004<sup>2</sup> (β<sup>̂</sup><sub>4</sub>)</td><td></td><td></td><td></td><td></td><td></td><td>0.059<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.023)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004 (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td></td><td>1.404<sup>***</sup></td><td>1.373<sup>***</sup></td><td>-1.098<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.276)</td><td>(0.283)</td><td>(0.544)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004<sup>2</sup> (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td></td><td></td><td>0.149<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.031)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>24.970<sup>***</sup></td><td>8.977<sup>***</sup></td><td>8.120<sup>***</sup></td><td>14.944<sup>***</sup></td><td>14.423<sup>***</sup></td><td>18.145<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(2.712)</td><td>(2.054)</td><td>(2.180)</td><td>(1.946)</td><td>(2.113)</td><td>(1.634)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>282</td><td>232</td><td>233</td><td>213</td><td>214</td><td>165</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>11.18</td><td>5.45</td><td>5.34</td><td>4.06</td><td>4.03</td><td>1.92</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.46</td><td>0.87</td><td>0.87</td><td>0.92</td><td>0.92</td><td>0.98</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>30.81</td><td>115.83</td><td>78.31</td><td>143.9</td><td>106.16</td><td>306.67</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.rd.nw.1 
m2 <- mod.rd.nw.2 
m3 <- mod.rd.nw.3
m4 <- mod.rd.nw.4
m5 <- mod.rd.nw.5 
m6 <- mod.rd.nw.6

stargazer(m1, m2, m3, m4, m5, m6, title="Newey-West Adjusted Linear Models of Annual Domestic Pharmaceutical R&D Spending", align=TRUE, type = "html", out="Final Models Formatted/rdmodels.htm",
          covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>2</sub>)", "Years After 2004 (β<sup>̂</sup><sub>3</sub>)", "Years After 2004^{2} (β<sup>̂</sup><sub>4</sub>)", "US:Years After 2004 (β<sup>̂</sup><sub>5</sub>)", "US:Years After 2004^{2} (β<sup>̂</sup><sub>6</sub>)", "Intercept (β<sup>̂</sup><sub>0</sub>)"),
          dep.var.labels=c("Annual Domestic Pharmaceutical R&D Spending (Billion USD)"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"),
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Annual Domestic Pharmaceutical R&D Spending</strong></caption>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="6">Annual Domestic Pharmaceutical R&D Spending (Billion USD)</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td><td>Model 6</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td><td>(6)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>21.286<sup>***</sup></td><td>21.286<sup>***</sup></td><td>21.272<sup>***</sup></td><td>9.351<sup>***</sup></td><td>9.606<sup>***</sup></td><td>15.851<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(3.141)</td><td>(3.188)</td><td>(3.221)</td><td>(2.470)</td><td>(2.669)</td><td>(2.429)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>2</sub>)</td><td></td><td></td><td>0.183<sup>*</sup></td><td></td><td>0.083</td><td>-0.083</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.100)</td><td></td><td>(0.079)</td><td>(0.052)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>1.882<sup>***</sup></td><td>1.913<sup>***</sup></td><td>1.180<sup>***</sup></td><td>1.209<sup>***</sup></td><td>0.145</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.229)</td><td>(0.201)</td><td>(0.096)</td><td>(0.074)</td><td>(0.284)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004<sup>2</sup> (β<sup>̂</sup><sub>4</sub>)</td><td></td><td></td><td></td><td></td><td></td><td>0.059<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.016)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004 (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td></td><td>1.404<sup>***</sup></td><td>1.373<sup>***</sup></td><td>-1.098<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.321)</td><td>(0.331)</td><td>(0.618)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004<sup>2</sup> (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td></td><td></td><td>0.149<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.034)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>24.970<sup>***</sup></td><td>8.977<sup>***</sup></td><td>8.120<sup>***</sup></td><td>14.944<sup>***</sup></td><td>14.423<sup>***</sup></td><td>18.145<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(2.455)</td><td>(3.279)</td><td>(2.920)</td><td>(1.138)</td><td>(0.896)</td><td>(0.916)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>282</td><td>232</td><td>233</td><td>213</td><td>214</td><td>165</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>11.18</td><td>5.45</td><td>5.34</td><td>4.06</td><td>4.03</td><td>1.92</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.46</td><td>0.87</td><td>0.87</td><td>0.92</td><td>0.92</td><td>0.98</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>30.81</td><td>115.83</td><td>78.31</td><td>143.9</td><td>106.16</td><td>306.67</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

Linear Models of Number of Orphan Drug Market Authorizations

Plot Models of Number of Orphan Drug Market Authorizations (Linear or Poisson Models)

plot_lin_pois_od <- function(model, model_number, model_type){
  
  #Plot residuals and acf/ pacf of residuals by panel (EU/US)
  combined_rd_ma_df$residuals <- resid(model)
  par(mfrow =c(2,4))
  
  us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
  eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
  
  plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
  acf(us_resid_data$residuals, main="ACF of US Residuals")
  pacf(us_resid_data$residuals, main="PACF of US Residuals")
  qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
  
  plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
  acf(eu_resid_data$residuals, main="ACF of EU Residuals")
  pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
  qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
  
  
  mtext(glue('{model_type} Model {model_number} of Annual Orphan Drug MAs - Residual Plots'), side = 3, line = -1.1, outer = TRUE)

  predicted <- predict(model , type = "response")
  
  ggplot(combined_rd_ma_df, aes(x = year, y = freq_orphan_drug_mas, color = area, alpha="Observed")) +
    geom_point() +
    labs(x = "Year",
         y = str_wrap("Number of Annual Orphan Drug Market Authorizations", width = 40),
         color = "Area",
         alpha = "Line Type",
         title=str_wrap(glue("{model_type} Model {model_number} of Annual Orphan Drug Market Authorizations"), width = 60))+
    scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
    geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
    scale_alpha_manual(values = c(0.5, 1))+
  theme_minimal()
    
  # https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
  #Controlling legend appearance in ggplot2 with override.aes
  #July 9, 2020 ·  @aosmith16 
  
  # https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
  #Teun van den Brand
  #2024/02/26

}

Get Metrics of Models of Number of Orphan Drug Market Authorizations (Linear or Poisson Models)

get_od_model_metrics <- function(model, model_type){
  #Get RMSE
  predicted <- predict(model, type = "response")
  actual <- combined_rd_ma_df$freq_orphan_drug_mas
  model_rmse <- rmse(actual=actual, predicted=predicted)
  

  model_df1 <- NA
  model_df2 <- NA
  model_adjRsq <- NA
  model_fstat <- NA
  model_pval <- NA
  model_loglik <- NA
  
  if(model_type == "Linear"){
    
    model_df1 <- summary(model)$fstatistic[2]
    model_df2 <- summary(model)$fstatistic[3]
    model_fstat <- summary(model)$fstatistic[1]
    model_pval <- pf(summary(model)$fstatistic[1], summary(model)$fstatistic[2], summary(model)$fstatistic[3], lower.tail = FALSE) 
    # https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
    # Renesh Bedre
    # March 26, 2023
  
    model_adjRsq <- summary(model)$adj.r.squared
  }
  
  if(model_type == "Poisson"){
    model_loglik <- logLik(model)
  }
  
  model_aic <- AIC(model)
  
  metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq, "LogLik" = model_loglik)
  rownames(metrics_df) <- NULL


  return(metrics_df)
}

Linear Model 1 of Num of Annual Orphan Drug MAs

mod.od.1 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004  , data=combined_rd_ma_df)
summary(mod.od.1)
## 
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.210 -11.451  -2.210   9.258  34.673 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -13.1974     5.5594  -2.374   0.0236 *  
## as.factor(area)US  32.9444     5.1374   6.413 2.88e-07 ***
## yearsafter2004      2.8271     0.4951   5.710 2.27e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.41 on 33 degrees of freedom
## Multiple R-squared:  0.6908, Adjusted R-squared:  0.6721 
## F-statistic: 36.86 on 2 and 33 DF,  p-value: 3.881e-09
plot_lin_pois_od(mod.od.1, "1", "Linear")

#Estimate covariance matrix with Newey West
mod.od.1.vcov <- vcovPL(mod.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.od.nw.1 <-coeftest(mod.od.1, vcov = mod.od.1.vcov)
mod.od.nw.1
## 
## t test of coefficients:
## 
##                    Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)       -13.19737    7.38616 -1.7868  0.083165 .  
## as.factor(area)US  32.94444    9.89037  3.3310  0.002141 ** 
## yearsafter2004      2.82714    0.37832  7.4728 1.369e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Get RMSE
predicted <- predict(mod.od.1, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
mod.od.1.rmse <- rmse(actual=actual, predicted=predicted)


confint(mod.od.nw.1)
##                        2.5 %    97.5 %
## (Intercept)       -28.224621  1.829884
## as.factor(area)US  12.822344 53.066545
## yearsafter2004      2.057436  3.596847
mod.od.1.metrics <- get_od_model_metrics(mod.od.1, "Linear")

Linear Model 2 of Num of Annual Orphan Drug MAs

mod.od.2 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004  + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.od.2)
## 
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.8357  -4.5679   0.6621   3.4228  23.2087 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             -32.2305     4.7330  -6.810 1.25e-07
## as.factor(area)US                        -9.8002     7.6308  -1.284    0.209
## yearsafter2004                           -0.9122     0.6983  -1.306    0.201
## annual_domestic_RD_spending_bil_dollars   2.0073     0.3205   6.262 5.83e-07
## gdp_per_capita_annual_growthrate_usd      0.2169     0.2989   0.726    0.474
##                                            
## (Intercept)                             ***
## as.factor(area)US                          
## yearsafter2004                             
## annual_domestic_RD_spending_bil_dollars ***
## gdp_per_capita_annual_growthrate_usd       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.28 on 31 degrees of freedom
## Multiple R-squared:  0.8708, Adjusted R-squared:  0.8541 
## F-statistic: 52.24 on 4 and 31 DF,  p-value: 2.429e-13
plot_lin_pois_od(mod.od.2, "2", "Linear")

#Estimate covariance matrix with Newey West
mod.od.2.vcov <- vcovPL(mod.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.od.nw.2 <-coeftest(mod.od.2, vcov = mod.od.2.vcov)
mod.od.nw.2
## 
## t test of coefficients:
## 
##                                          Estimate Std. Error t value  Pr(>|t|)
## (Intercept)                             -32.23048    5.15161 -6.2564 5.928e-07
## as.factor(area)US                        -9.80022    8.02606 -1.2211    0.2313
## yearsafter2004                           -0.91218    0.60024 -1.5197    0.1387
## annual_domestic_RD_spending_bil_dollars   2.00726    0.31897  6.2928 5.348e-07
## gdp_per_capita_annual_growthrate_usd      0.21686    0.24590  0.8819    0.3846
##                                            
## (Intercept)                             ***
## as.factor(area)US                          
## yearsafter2004                             
## annual_domestic_RD_spending_bil_dollars ***
## gdp_per_capita_annual_growthrate_usd       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Get RMSE
predicted <- predict(mod.od.2, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
mod.od.2.rmse <- rmse(actual=actual, predicted=predicted)

confint(mod.od.nw.2)
##                                              2.5 %      97.5 %
## (Intercept)                             -42.737264 -21.7237059
## as.factor(area)US                       -26.169472   6.5690240
## yearsafter2004                           -2.136377   0.3120218
## annual_domestic_RD_spending_bil_dollars   1.356706   2.6578126
## gdp_per_capita_annual_growthrate_usd     -0.284644   0.7183715
mod.od.2.metrics <- get_od_model_metrics(mod.od.2, "Linear")

VIFs of predictors

vif(mod.od.2) 
##                         as.factor(area)                          yearsafter2004 
##                                4.960278                                4.472835 
## annual_domestic_RD_spending_bil_dollars    gdp_per_capita_annual_growthrate_usd 
##                                8.341983                                1.064648
cor(cbind(yearsafter2004=combined_rd_ma_df$yearsafter2004, annual_domestic_RD_spending_bil_dollars=combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars, gdp_per_capita_annual_growthrate_usd=combined_rd_ma_df$gdp_per_capita_annual_growthrate_usd))
##                                         yearsafter2004
## yearsafter2004                               1.0000000
## annual_domestic_RD_spending_bil_dollars      0.6323924
## gdp_per_capita_annual_growthrate_usd        -0.1516009
##                                         annual_domestic_RD_spending_bil_dollars
## yearsafter2004                                                       0.63239241
## annual_domestic_RD_spending_bil_dollars                              1.00000000
## gdp_per_capita_annual_growthrate_usd                                -0.02262092
##                                         gdp_per_capita_annual_growthrate_usd
## yearsafter2004                                                   -0.15160094
## annual_domestic_RD_spending_bil_dollars                          -0.02262092
## gdp_per_capita_annual_growthrate_usd                              1.00000000

Linear Model 3 of Num of Annual Orphan Drug MAs

mod.od.3 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004 , data=combined_rd_ma_df)
summary(mod.od.3)
## 
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.1119  -4.6375  -0.4147   5.1218  21.9930 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            4.1015     4.7330   0.867    0.393    
## as.factor(area)US                     -4.1771     6.2759  -0.666    0.511    
## yearsafter2004                         0.6910     0.4527   1.526    0.137    
## gdp_per_capita_annual_growthrate_usd   0.2675     0.2808   0.952    0.348    
## as.factor(area)US:yearsafter2004       4.3647     0.6328   6.897 9.85e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.717 on 31 degrees of freedom
## Multiple R-squared:  0.8845, Adjusted R-squared:  0.8696 
## F-statistic: 59.38 on 4 and 31 DF,  p-value: 4.316e-14
plot_lin_pois_od(mod.od.3, "3", "Linear")

#Estimate covariance matrix with Newey West
mod.od.3.vcov <- vcovPL(mod.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.od.nw.3 <-coeftest(mod.od.3, vcov = mod.od.3.vcov)
mod.od.nw.3
## 
## t test of coefficients:
## 
##                                      Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                           4.10148    1.59477  2.5718 0.0151325 *  
## as.factor(area)US                    -4.17711    8.02490 -0.5205 0.6064004    
## yearsafter2004                        0.69101    0.16355  4.2251 0.0001947 ***
## gdp_per_capita_annual_growthrate_usd  0.26746    0.15363  1.7409 0.0916193 .  
## as.factor(area)US:yearsafter2004      4.36471    0.76468  5.7079 2.824e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Get RMSE
predicted <- predict(mod.od.3, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
mod.od.3.rmse <- rmse(actual=actual, predicted=predicted)

confint(mod.od.nw.3)
##                                             2.5 %     97.5 %
## (Intercept)                            0.84892641  7.3540291
## as.factor(area)US                    -20.54400474 12.1897872
## yearsafter2004                         0.35744757  1.0245755
## gdp_per_capita_annual_growthrate_usd  -0.04587725  0.5808023
## as.factor(area)US:yearsafter2004       2.80512894  5.9242846
mod.od.3.metrics <- get_od_model_metrics(mod.od.3, "Linear")

Linear Model 4 of Num of Annual Orphan Drug MAs

mod.od.4 <- lm(freq_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd
 + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df)
summary(mod.od.4)
## 
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars + 
##     gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.4436  -3.1015  -0.0441   2.4754  20.6074 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                                -2.5320     8.8098
## as.factor(area)US                                         -39.7165    11.1343
## annual_domestic_RD_spending_bil_dollars                     0.5204     0.3345
## gdp_per_capita_annual_growthrate_usd                        0.1157     0.2550
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   1.3312     0.3656
##                                                           t value Pr(>|t|)    
## (Intercept)                                                -0.287  0.77571    
## as.factor(area)US                                          -3.567  0.00120 ** 
## annual_domestic_RD_spending_bil_dollars                     1.556  0.12990    
## gdp_per_capita_annual_growthrate_usd                        0.454  0.65300    
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   3.641  0.00098 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.836 on 31 degrees of freedom
## Multiple R-squared:  0.9045, Adjusted R-squared:  0.8922 
## F-statistic: 73.42 on 4 and 31 DF,  p-value: 2.318e-15
plot_lin_pois_od(mod.od.4, "4", "Linear")

#Estimate covariance matrix with Newey West
mod.od.4.vcov <- vcovPL(mod.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.od.nw.4 <-coeftest(mod.od.4, vcov = mod.od.4.vcov)
mod.od.nw.4
## 
## t test of coefficients:
## 
##                                                            Estimate Std. Error
## (Intercept)                                                -2.53198    3.64040
## as.factor(area)US                                         -39.71653    7.30819
## annual_domestic_RD_spending_bil_dollars                     0.52038    0.13654
## gdp_per_capita_annual_growthrate_usd                        0.11574    0.16107
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   1.33116    0.15781
##                                                           t value  Pr(>|t|)    
## (Intercept)                                               -0.6955 0.4919097    
## as.factor(area)US                                         -5.4345 6.177e-06 ***
## annual_domestic_RD_spending_bil_dollars                    3.8111 0.0006158 ***
## gdp_per_capita_annual_growthrate_usd                       0.7186 0.4777807    
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  8.4354 1.578e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Get RMSE
predicted <- predict(mod.od.4, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
mod.od.4.rmse <- rmse(actual=actual, predicted=predicted)

confint(mod.od.nw.4)
##                                                                 2.5 %
## (Intercept)                                                -9.9566323
## as.factor(area)US                                         -54.6216884
## annual_domestic_RD_spending_bil_dollars                     0.2418948
## gdp_per_capita_annual_growthrate_usd                       -0.2127660
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   1.0093119
##                                                                97.5 %
## (Intercept)                                                 4.8926710
## as.factor(area)US                                         -24.8113739
## annual_domestic_RD_spending_bil_dollars                     0.7988630
## gdp_per_capita_annual_growthrate_usd                        0.4442535
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   1.6530078
mod.od.4.metrics <- get_od_model_metrics(mod.od.4, "Linear")

Linear Model 5 of Num of Annual Orphan Drug MAs

mod.od.5 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars , data=combined_rd_ma_df)
summary(mod.od.5)
## 
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + 
##     as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.171  -4.071  -1.797   4.741  20.032 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                                 0.5217    17.7548
## as.factor(area)US                                         -29.5076    20.3273
## yearsafter2004                                              0.3260     1.4230
## annual_domestic_RD_spending_bil_dollars                     0.2801     1.1560
## gdp_per_capita_annual_growthrate_usd                        0.1704     0.2511
## as.factor(area)US:yearsafter2004                            1.6236     1.7585
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   0.9226     1.2147
##                                                           t value Pr(>|t|)
## (Intercept)                                                 0.029    0.977
## as.factor(area)US                                          -1.452    0.157
## yearsafter2004                                              0.229    0.820
## annual_domestic_RD_spending_bil_dollars                     0.242    0.810
## gdp_per_capita_annual_growthrate_usd                        0.679    0.503
## as.factor(area)US:yearsafter2004                            0.923    0.363
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   0.760    0.454
## 
## Residual standard error: 8.627 on 29 degrees of freedom
## Multiple R-squared:  0.9149, Adjusted R-squared:  0.8973 
## F-statistic: 51.94 on 6 and 29 DF,  p-value: 3.319e-14
plot_lin_pois_od(mod.od.5, "5", "Linear")

#Estimate covariance matrix with Newey West
mod.od.5.vcov <- vcovPL(mod.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.od.nw.5 <- coeftest(mod.od.5, vcov = mod.od.5.vcov) 
mod.od.nw.5
## 
## t test of coefficients:
## 
##                                                            Estimate Std. Error
## (Intercept)                                                 0.52172    6.66568
## as.factor(area)US                                         -29.50760    6.38127
## yearsafter2004                                              0.32598    0.55261
## annual_domestic_RD_spending_bil_dollars                     0.28009    0.43516
## gdp_per_capita_annual_growthrate_usd                        0.17044    0.16030
## as.factor(area)US:yearsafter2004                            1.62358    1.06892
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   0.92259    0.44845
##                                                           t value  Pr(>|t|)    
## (Intercept)                                                0.0783   0.93815    
## as.factor(area)US                                         -4.6241 7.203e-05 ***
## yearsafter2004                                             0.5899   0.55983    
## annual_domestic_RD_spending_bil_dollars                    0.6436   0.52486    
## gdp_per_capita_annual_growthrate_usd                       1.0633   0.29642    
## as.factor(area)US:yearsafter2004                           1.5189   0.13962    
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  2.0573   0.04875 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Get RMSE
predicted <- predict(mod.od.5, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
mod.od.5.rmse <- rmse(actual=actual, predicted=predicted)

confint(mod.od.nw.5)
##                                                                   2.5 %
## (Intercept)                                               -13.111122277
## as.factor(area)US                                         -42.558750363
## yearsafter2004                                             -0.804238887
## annual_domestic_RD_spending_bil_dollars                    -0.609907728
## gdp_per_capita_annual_growthrate_usd                       -0.157398553
## as.factor(area)US:yearsafter2004                           -0.562604191
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   0.005410144
##                                                                97.5 %
## (Intercept)                                                14.1545525
## as.factor(area)US                                         -16.4564446
## yearsafter2004                                              1.4562082
## annual_domestic_RD_spending_bil_dollars                     1.1700830
## gdp_per_capita_annual_growthrate_usd                        0.4982868
## as.factor(area)US:yearsafter2004                            3.8097712
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   1.8397741
mod.od.5.metrics <- get_od_model_metrics(mod.od.5, "Linear")
mod.od.metrics <- rbind(mod.od.1.metrics, mod.od.2.metrics, mod.od.3.metrics, mod.od.4.metrics, mod.od.5.metrics)
aic_values <- round(mod.od.metrics$aic)
rmse_values <- round(mod.od.metrics$rmse, 2)
fstat_values <- round(mod.od.metrics$fstat, 2)
p_values <- round(mod.od.metrics$pval, 4)
adjrsq_values <- round(mod.od.metrics$adjrsq, 2)


stargazer(mod.od.1, mod.od.2, mod.od.3, mod.od.4, mod.od.5, title="Linear Models of Orphan Drug Market Authorizations (Not Adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/orphandrugMAmodelsNonAdjusted.htm",
          covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)" ,"Intercept (β<sup>̂</sup><sub>0</sub>)"),
          dep.var.labels=c("Annual Number of Market Authorized Orphan Drugs"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Linear Models of Orphan Drug Market Authorizations (Not Adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorized Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>32.944<sup>***</sup></td><td>-9.800</td><td>-4.177</td><td>-39.717<sup>***</sup></td><td>-29.508</td></tr>
## <tr><td style="text-align:left"></td><td>(5.137)</td><td>(7.631)</td><td>(6.276)</td><td>(11.134)</td><td>(20.327)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>2.827<sup>***</sup></td><td>-0.912</td><td>0.691</td><td></td><td>0.326</td></tr>
## <tr><td style="text-align:left"></td><td>(0.495)</td><td>(0.698)</td><td>(0.453)</td><td></td><td>(1.423)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>2.007<sup>***</sup></td><td></td><td>0.520</td><td>0.280</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.321)</td><td></td><td>(0.334)</td><td>(1.156)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.217</td><td>0.267</td><td>0.116</td><td>0.170</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.299)</td><td>(0.281)</td><td>(0.255)</td><td>(0.251)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>4.365<sup>***</sup></td><td></td><td>1.624</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.633)</td><td></td><td>(1.758)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>1.331<sup>***</sup></td><td>0.923</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.366)</td><td>(1.215)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>-13.197<sup>**</sup></td><td>-32.230<sup>***</sup></td><td>4.101</td><td>-2.532</td><td>0.522</td></tr>
## <tr><td style="text-align:left"></td><td>(5.559)</td><td>(4.733)</td><td>(4.733)</td><td>(8.810)</td><td>(17.755)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>304</td><td>277</td><td>272</td><td>266</td><td>266</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>14.76</td><td>9.54</td><td>9.02</td><td>8.2</td><td>7.74</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.67</td><td>0.85</td><td>0.87</td><td>0.89</td><td>0.9</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>36.86</td><td>52.24</td><td>59.38</td><td>73.42</td><td>51.94</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.od.nw.1
m2 <- mod.od.nw.2
m3 <- mod.od.nw.3
m4 <- mod.od.nw.4
m5 <- mod.od.nw.5 

stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Linear Models of Orphan Drug Market Authorizations", align=TRUE, type = "html", out="Final Models Formatted/orphandrugMAmodels.htm",
          covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sub>6</sub>)" ,"Intercept (β<sup>̂</sup><sub>0</sub>)"),
          dep.var.labels=c("Annual Number of Market Authorized Orphan Drugs"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Orphan Drug Market Authorizations</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorized Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>32.944<sup>***</sup></td><td>-9.800</td><td>-4.177</td><td>-39.717<sup>***</sup></td><td>-29.508<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(9.890)</td><td>(8.026)</td><td>(8.025)</td><td>(7.308)</td><td>(6.381)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>2.827<sup>***</sup></td><td>-0.912</td><td>0.691<sup>***</sup></td><td></td><td>0.326</td></tr>
## <tr><td style="text-align:left"></td><td>(0.378)</td><td>(0.600)</td><td>(0.164)</td><td></td><td>(0.553)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>2.007<sup>***</sup></td><td></td><td>0.520<sup>***</sup></td><td>0.280</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.319)</td><td></td><td>(0.137)</td><td>(0.435)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.217</td><td>0.267<sup>*</sup></td><td>0.116</td><td>0.170</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.246)</td><td>(0.154)</td><td>(0.161)</td><td>(0.160)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>4.365<sup>***</sup></td><td></td><td>1.624</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.765)</td><td></td><td>(1.069)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sub>6</sub>)</td><td></td><td></td><td></td><td>1.331<sup>***</sup></td><td>0.923<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.158)</td><td>(0.448)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>-13.197<sup>*</sup></td><td>-32.230<sup>***</sup></td><td>4.101<sup>**</sup></td><td>-2.532</td><td>0.522</td></tr>
## <tr><td style="text-align:left"></td><td>(7.386)</td><td>(5.152)</td><td>(1.595)</td><td>(3.640)</td><td>(6.666)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>304</td><td>277</td><td>272</td><td>266</td><td>266</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>14.76</td><td>9.54</td><td>9.02</td><td>8.2</td><td>7.74</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.67</td><td>0.85</td><td>0.87</td><td>0.89</td><td>0.9</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>36.86</td><td>52.24</td><td>59.38</td><td>73.42</td><td>51.94</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

Poisson Models of Number of Orphan Drug Market Authorization Models

Poisson Model 1 of Num of Annual Orphan Drug MAs

mod.pois.od.1 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.1)
## 
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       1.287989   0.104548   12.32   <2e-16 ***
## as.factor(area)US 1.396499   0.079982   17.46   <2e-16 ***
## yearsafter2004    0.110161   0.006749   16.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 759.207  on 35  degrees of freedom
## Residual deviance:  83.399  on 33  degrees of freedom
## AIC: 260.65
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.1, "1", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.od.1.vcov <- vcovPL(mod.pois.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.1 <- coeftest(mod.pois.od.1, vcov = mod.pois.od.1.vcov)
mod.pois.od.nw.1
## 
## z test of coefficients:
## 
##                   Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)       1.287989   0.202155  6.3713 1.875e-10 ***
## as.factor(area)US 1.396499   0.152009  9.1869 < 2.2e-16 ***
## yearsafter2004    0.110161   0.011615  9.4841 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.1)
##                        2.5 %    97.5 %
## (Intercept)       0.89177186 1.6842066
## as.factor(area)US 1.09856577 1.6944313
## yearsafter2004    0.08739558 0.1329269
mod.pois.od.1.metrics <- get_od_model_metrics(mod.pois.od.1, "Poisson")

Poisson Model 2 of Num of Annual Orphan Drug MAs

mod.pois.od.2 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.2)
## 
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                             1.302515   0.102680  12.685  < 2e-16
## as.factor(area)US                       1.090152   0.156723   6.956 3.50e-12
## yearsafter2004                          0.077325   0.015750   4.910 9.13e-07
## annual_domestic_RD_spending_bil_dollars 0.011783   0.005311   2.219   0.0265
## gdp_per_capita_annual_growthrate_usd    0.006178   0.007158   0.863   0.3881
##                                            
## (Intercept)                             ***
## as.factor(area)US                       ***
## yearsafter2004                          ***
## annual_domestic_RD_spending_bil_dollars *  
## gdp_per_capita_annual_growthrate_usd       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 759.207  on 35  degrees of freedom
## Residual deviance:  76.552  on 31  degrees of freedom
## AIC: 257.81
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.2, "2", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.od.2.vcov <- vcovPL(mod.pois.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.2 <- coeftest(mod.pois.od.2, vcov = mod.pois.od.2.vcov)
mod.pois.od.nw.2
## 
## z test of coefficients:
## 
##                                          Estimate Std. Error z value  Pr(>|z|)
## (Intercept)                             1.3025153  0.1499779  8.6847 < 2.2e-16
## as.factor(area)US                       1.0901521  0.2178398  5.0044 5.604e-07
## yearsafter2004                          0.0773254  0.0195387  3.9576 7.572e-05
## annual_domestic_RD_spending_bil_dollars 0.0117827  0.0053372  2.2077   0.02727
## gdp_per_capita_annual_growthrate_usd    0.0061781  0.0129818  0.4759   0.63414
##                                            
## (Intercept)                             ***
## as.factor(area)US                       ***
## yearsafter2004                          ***
## annual_domestic_RD_spending_bil_dollars *  
## gdp_per_capita_annual_growthrate_usd       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.2)
##                                                2.5 %     97.5 %
## (Intercept)                              1.008564107 1.59646648
## as.factor(area)US                        0.663193984 1.51711031
## yearsafter2004                           0.039030329 0.11562056
## annual_domestic_RD_spending_bil_dollars  0.001321983 0.02224351
## gdp_per_capita_annual_growthrate_usd    -0.019265783 0.03162206
mod.pois.od.2.metrics <- get_od_model_metrics(mod.pois.od.2, "Poisson")

Poisson Model 3 of Num of Annual Orphan Drug MAs

mod.pois.od.3 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.3)
## 
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                           1.83366    0.16093  11.394  < 2e-16 ***
## as.factor(area)US                     0.66882    0.18556   3.604 0.000313 ***
## yearsafter2004                        0.05711    0.01406   4.061 4.88e-05 ***
## gdp_per_capita_annual_growthrate_usd  0.00669    0.00700   0.956 0.339238    
## as.factor(area)US:yearsafter2004      0.06688    0.01616   4.139 3.48e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 759.207  on 35  degrees of freedom
## Residual deviance:  64.577  on 31  degrees of freedom
## AIC: 245.83
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.3, "3", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.od.3.vcov <- vcovPL(mod.pois.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.3 <- coeftest(mod.pois.od.3, vcov = mod.pois.od.3.vcov)
mod.pois.od.nw.3
## 
## z test of coefficients:
## 
##                                       Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)                          1.8336595  0.1868493  9.8136 < 2.2e-16 ***
## as.factor(area)US                    0.6688196  0.1924589  3.4751 0.0005106 ***
## yearsafter2004                       0.0571056  0.0154020  3.7077 0.0002092 ***
## gdp_per_capita_annual_growthrate_usd 0.0066898  0.0109286  0.6121 0.5404476    
## as.factor(area)US:yearsafter2004     0.0668829  0.0152407  4.3884 1.142e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.3)
##                                            2.5 %     97.5 %
## (Intercept)                           1.46744163 2.19987736
## as.factor(area)US                     0.29160715 1.04603202
## yearsafter2004                        0.02691824 0.08729288
## gdp_per_capita_annual_growthrate_usd -0.01472983 0.02810937
## as.factor(area)US:yearsafter2004      0.03701170 0.09675405
mod.pois.od.3.metrics <- get_od_model_metrics(mod.pois.od.3, "Poisson")

Poisson Model 4 of Num of Annual Orphan Drug MAs

mod.pois.od.4 <- glm(freq_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df, family=poisson(link="log")) 
summary(mod.pois.od.4)
## 
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars + 
##     gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                1.2582327  0.3072993
## as.factor(area)US                                          0.7297465  0.3311064
## annual_domestic_RD_spending_bil_dollars                    0.0435081  0.0110544
## gdp_per_capita_annual_growthrate_usd                      -0.0005021  0.0067903
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.0079991  0.0113163
##                                                           z value Pr(>|z|)    
## (Intercept)                                                 4.094 4.23e-05 ***
## as.factor(area)US                                           2.204   0.0275 *  
## annual_domestic_RD_spending_bil_dollars                     3.936 8.29e-05 ***
## gdp_per_capita_annual_growthrate_usd                       -0.074   0.9411    
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  -0.707   0.4797    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 759.21  on 35  degrees of freedom
## Residual deviance: 100.64  on 31  degrees of freedom
## AIC: 281.89
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.4, "4", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.od.4.vcov <- vcovPL(mod.pois.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.4 <- coeftest(mod.pois.od.4, vcov = mod.pois.od.4.vcov)
mod.pois.od.nw.4
## 
## z test of coefficients:
## 
##                                                              Estimate
## (Intercept)                                                1.25823275
## as.factor(area)US                                          0.72974651
## annual_domestic_RD_spending_bil_dollars                    0.04350814
## gdp_per_capita_annual_growthrate_usd                      -0.00050209
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.00799909
##                                                            Std. Error z value
## (Intercept)                                                0.34888379  3.6065
## as.factor(area)US                                          0.35309408  2.0667
## annual_domestic_RD_spending_bil_dollars                    0.01181452  3.6826
## gdp_per_capita_annual_growthrate_usd                       0.01101955 -0.0456
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  0.01040192 -0.7690
##                                                            Pr(>|z|)    
## (Intercept)                                               0.0003104 ***
## as.factor(area)US                                         0.0387606 *  
## annual_domestic_RD_spending_bil_dollars                   0.0002309 ***
## gdp_per_capita_annual_growthrate_usd                      0.9636583    
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.4418925    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.4)
##                                                                 2.5 %
## (Intercept)                                                0.57443309
## as.factor(area)US                                          0.03769483
## annual_domestic_RD_spending_bil_dollars                    0.02035211
## gdp_per_capita_annual_growthrate_usd                      -0.02210000
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.02838648
##                                                               97.5 %
## (Intercept)                                               1.94203240
## as.factor(area)US                                         1.42179819
## annual_domestic_RD_spending_bil_dollars                   0.06666417
## gdp_per_capita_annual_growthrate_usd                      0.02109583
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.01238830
mod.pois.od.4.metrics <- get_od_model_metrics(mod.pois.od.4, "Poisson")

Poisson Model 5 of Num of Annual Orphan Drug MAs

mod.pois.od.5 <- glm(freq_orphan_drug_mas ~  as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df, family=poisson(link="log")) 
summary(mod.pois.od.5)
## 
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + 
##     as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                1.587669   0.644130
## as.factor(area)US                                          0.954539   0.663174
## yearsafter2004                                             0.036717   0.054130
## annual_domestic_RD_spending_bil_dollars                    0.016716   0.042740
## gdp_per_capita_annual_growthrate_usd                       0.007273   0.007184
## as.factor(area)US:yearsafter2004                           0.094297   0.058561
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.018953   0.043284
##                                                           z value Pr(>|z|)  
## (Intercept)                                                 2.465   0.0137 *
## as.factor(area)US                                           1.439   0.1501  
## yearsafter2004                                              0.678   0.4976  
## annual_domestic_RD_spending_bil_dollars                     0.391   0.6957  
## gdp_per_capita_annual_growthrate_usd                        1.012   0.3114  
## as.factor(area)US:yearsafter2004                            1.610   0.1073  
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  -0.438   0.6615  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 759.207  on 35  degrees of freedom
## Residual deviance:  64.312  on 29  degrees of freedom
## AIC: 249.57
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.5, "5", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.od.5.vcov <- vcovPL(mod.pois.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.5 <- coeftest(mod.pois.od.5, vcov = mod.pois.od.5.vcov)
mod.pois.od.nw.5
## 
## z test of coefficients:
## 
##                                                             Estimate Std. Error
## (Intercept)                                                1.5876688  0.6905329
## as.factor(area)US                                          0.9545391  0.5669762
## yearsafter2004                                             0.0367170  0.0592109
## annual_domestic_RD_spending_bil_dollars                    0.0167164  0.0454894
## gdp_per_capita_annual_growthrate_usd                       0.0072732  0.0108530
## as.factor(area)US:yearsafter2004                           0.0942970  0.0663960
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.0189534  0.0442075
##                                                           z value Pr(>|z|)  
## (Intercept)                                                2.2992  0.02149 *
## as.factor(area)US                                          1.6836  0.09227 .
## yearsafter2004                                             0.6201  0.53519  
## annual_domestic_RD_spending_bil_dollars                    0.3675  0.71326  
## gdp_per_capita_annual_growthrate_usd                       0.6702  0.50276  
## as.factor(area)US:yearsafter2004                           1.4202  0.15554  
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.4287  0.66811  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.5)
##                                                                 2.5 %
## (Intercept)                                                0.23424917
## as.factor(area)US                                         -0.15671387
## yearsafter2004                                            -0.07933417
## annual_domestic_RD_spending_bil_dollars                   -0.07244128
## gdp_per_capita_annual_growthrate_usd                      -0.01399818
## as.factor(area)US:yearsafter2004                          -0.03583679
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.10559845
##                                                               97.5 %
## (Intercept)                                               2.94108845
## as.factor(area)US                                         2.06579208
## yearsafter2004                                            0.15276810
## annual_domestic_RD_spending_bil_dollars                   0.10587408
## gdp_per_capita_annual_growthrate_usd                      0.02854465
## as.factor(area)US:yearsafter2004                          0.22443079
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.06769167
mod.pois.od.5.metrics <- get_od_model_metrics(mod.pois.od.5, "Poisson")
mod.od.pois.metrics <- rbind(mod.pois.od.1.metrics, mod.pois.od.2.metrics, mod.pois.od.3.metrics, mod.pois.od.4.metrics, mod.pois.od.5.metrics)
mod.od.pois.metrics
mod.od.pois.metrics <- rbind(mod.pois.od.1.metrics, mod.pois.od.2.metrics, mod.pois.od.3.metrics, mod.pois.od.4.metrics, mod.pois.od.5.metrics)
aic_values <- round(mod.od.pois.metrics$aic)
rmse_values <- round(mod.od.pois.metrics$rmse, 2)
logLik_vlues <- round(mod.od.pois.metrics$LogLik, 2)


#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.pois.od.1
m2 <- mod.pois.od.2
m3 <- mod.pois.od.3
m4 <- mod.pois.od.4
m5 <- mod.pois.od.5 


stargazer(m1, m2, m3, m4, m5, title="Poisson Models of Annual Orphan Drug Market Authorizations (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/PoissonorphandrugMAmodelsNonAdjusted.htm",
          covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)","Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)" , "Intercept (β<sup>̂</sup><sub>0</sub>)"),
          dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", logLik_vlues)))
## 
## <table style="text-align:center"><caption><strong>Poisson Models of Annual Orphan Drug Market Authorizations (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>1.396<sup>***</sup></td><td>1.090<sup>***</sup></td><td>0.669<sup>***</sup></td><td>0.730<sup>**</sup></td><td>0.955</td></tr>
## <tr><td style="text-align:left"></td><td>(0.080)</td><td>(0.157)</td><td>(0.186)</td><td>(0.331)</td><td>(0.663)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>0.110<sup>***</sup></td><td>0.077<sup>***</sup></td><td>0.057<sup>***</sup></td><td></td><td>0.037</td></tr>
## <tr><td style="text-align:left"></td><td>(0.007)</td><td>(0.016)</td><td>(0.014)</td><td></td><td>(0.054)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.012<sup>**</sup></td><td></td><td>0.044<sup>***</sup></td><td>0.017</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.005)</td><td></td><td>(0.011)</td><td>(0.043)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.006</td><td>0.007</td><td>-0.001</td><td>0.007</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.007)</td><td>(0.007)</td><td>(0.007)</td><td>(0.007)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>0.067<sup>***</sup></td><td></td><td>0.094</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.016)</td><td></td><td>(0.059)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>-0.008</td><td>-0.019</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.011)</td><td>(0.043)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>1.288<sup>***</sup></td><td>1.303<sup>***</sup></td><td>1.834<sup>***</sup></td><td>1.258<sup>***</sup></td><td>1.588<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.105)</td><td>(0.103)</td><td>(0.161)</td><td>(0.307)</td><td>(0.644)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>261</td><td>258</td><td>246</td><td>282</td><td>250</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>8.03</td><td>8.41</td><td>7.57</td><td>10.52</td><td>7.5</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-127.33</td><td>-123.9</td><td>-117.92</td><td>-135.95</td><td>-117.78</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.pois.od.nw.1
m2 <- mod.pois.od.nw.2
m3 <- mod.pois.od.nw.3
m4 <- mod.pois.od.nw.4
m5 <- mod.pois.od.nw.5 

stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Poisson Models of Orphan Drug Market Authorizations", align=TRUE, type = "html", out="Final Models Formatted/PoissonorphandrugMAmodels.htm",
          covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)","Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)" , "Intercept (β<sup>̂</sup><sub>0</sub>)"),
          dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", logLik_vlues)))
## 
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Poisson Models of Orphan Drug Market Authorizations</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>1.396<sup>***</sup></td><td>1.090<sup>***</sup></td><td>0.669<sup>***</sup></td><td>0.730<sup>**</sup></td><td>0.955<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.152)</td><td>(0.218)</td><td>(0.192)</td><td>(0.353)</td><td>(0.567)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>0.110<sup>***</sup></td><td>0.077<sup>***</sup></td><td>0.057<sup>***</sup></td><td></td><td>0.037</td></tr>
## <tr><td style="text-align:left"></td><td>(0.012)</td><td>(0.020)</td><td>(0.015)</td><td></td><td>(0.059)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.012<sup>**</sup></td><td></td><td>0.044<sup>***</sup></td><td>0.017</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.005)</td><td></td><td>(0.012)</td><td>(0.045)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.006</td><td>0.007</td><td>-0.001</td><td>0.007</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.013)</td><td>(0.011)</td><td>(0.011)</td><td>(0.011)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>0.067<sup>***</sup></td><td></td><td>0.094</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.015)</td><td></td><td>(0.066)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>-0.008</td><td>-0.019</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.010)</td><td>(0.044)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>1.288<sup>***</sup></td><td>1.303<sup>***</sup></td><td>1.834<sup>***</sup></td><td>1.258<sup>***</sup></td><td>1.588<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.202)</td><td>(0.150)</td><td>(0.187)</td><td>(0.349)</td><td>(0.691)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>261</td><td>258</td><td>246</td><td>282</td><td>250</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>8.03</td><td>8.41</td><td>7.57</td><td>10.52</td><td>7.5</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-127.33</td><td>-123.9</td><td>-117.92</td><td>-135.95</td><td>-117.78</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

Linear Models of Number of Market Authorizations for NEW Orphan Drugs

Plot Models of Number of Orphan Drug Market Authorizations for NEW Orphan Drugs (Linear or Poisson Models)

plot_lin_pois_new_od <- function(model, model_number, model_type){
  
  #Plot residuals and acf/ pacf of residuals by panel (EU/US)
  combined_rd_ma_df$residuals <- resid(model)
  
  par(mfrow =c(2,4))
  
  us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
  eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
  
  plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
  acf(us_resid_data$residuals, main="ACF of US Residuals")
  pacf(us_resid_data$residuals, main="PACF of US Residuals")
  qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
  
  plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
  acf(eu_resid_data$residuals, main="ACF of EU Residuals")
  pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
  qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
  
  mtext(glue('{model_type} Model {model_number} of Annual MAs for New Orphan Drugs - Residual Plots'), side = 3, line = -1.1, outer = TRUE)

  predicted <- predict(model , type = "response")
  
  ggplot(combined_rd_ma_df, aes(x = year, y = freq_new_orphan_drug_mas, color = area, alpha="Observed")) +
    geom_point() +
    labs(x = "Year",
         y = str_wrap("Number of Annual Market Authorizations for New Orphan Drugs", width = 40),
         color = "Area",
         alpha = "Line Type",
         title=str_wrap(glue("{model_type} Model {model_number} of Annual Market Authorizations for New Orphan Drugs"), width = 60))+
    scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
    geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
    scale_alpha_manual(values = c(0.5, 1))+
  theme_minimal()
    
  # https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
  #Controlling legend appearance in ggplot2 with override.aes
  #July 9, 2020 ·  @aosmith16 
  
  # https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
  #Teun van den Brand
  #2024/02/26

}

Get Metrics of Models of Number of Market Authorizations for NEW Orphan Drugs (Linear or Poisson Models)

get_new_od_model_metrics <- function(model, model_type){
  #Get RMSE
  predicted <- predict(model, type = "response")
  actual <- combined_rd_ma_df$freq_new_orphan_drug_mas
  model_rmse <- rmse(actual=actual, predicted=predicted)
  
  model_df1 <- NA
  model_df2 <- NA
  model_adjRsq <- NA
  model_fstat <- NA
  model_pval <- NA
  model_loglik <- NA
  
  if(model_type == "Linear"){
    
    model_df1 <- summary(model)$fstatistic[2]
    model_df2 <- summary(model)$fstatistic[3]
    model_fstat <- summary(model)$fstatistic[1]
    model_pval <- pf(summary(model)$fstatistic[1], summary(model)$fstatistic[2], summary(model)$fstatistic[3], lower.tail = FALSE) 
    # https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
    # Renesh Bedre
    # March 26, 2023
  
    model_adjRsq <- summary(model)$adj.r.squared
  }
  
  if(model_type == "Poisson"){
    model_loglik <- logLik(model)
  }
  
  model_aic <- AIC(model)
  
  metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq, "LogLik" = model_loglik)
  rownames(metrics_df) <- NULL


  return(metrics_df)
}

Linear Model 1 of Annual MAs for NEW orphan drugs

mod.new.od.1 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df)
summary(mod.new.od.1)
## 
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.4943  -3.5090  -0.7898   3.6053  13.5920 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.8772     2.3258  -0.377    0.708    
## as.factor(area)US  15.5556     2.1492   7.238 2.66e-08 ***
## yearsafter2004      1.3581     0.2071   6.557 1.89e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.448 on 33 degrees of freedom
## Multiple R-squared:  0.7429, Adjusted R-squared:  0.7274 
## F-statistic: 47.69 on 2 and 33 DF,  p-value: 1.842e-10
plot_lin_pois_new_od(mod.new.od.1, "1", "Linear")

#Estimate covariance matrix with Newey West
mod.new.od.1.vcov <- vcovPL(mod.new.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.1 <-coeftest(mod.new.od.1, vcov = mod.new.od.1.vcov)
mod.new.od.nw.1
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)       -0.87719    2.55934 -0.3427     0.734    
## as.factor(area)US 15.55556    3.22850  4.8182 3.149e-05 ***
## yearsafter2004     1.35810    0.13298 10.2128 9.530e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.1.metrics <- get_new_od_model_metrics(mod.new.od.1, "Linear")

Linear Model 2 of Annual MAs for NEW orphan drugs

mod.new.od.2 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.new.od.2)
## 
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.1785  -3.2104  -0.2759   3.0949  10.9813 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                             -6.35122    2.61475  -2.429  0.02113 * 
## as.factor(area)US                        3.53369    4.21571   0.838  0.40832   
## yearsafter2004                           0.31111    0.38581   0.806  0.42616   
## annual_domestic_RD_spending_bil_dollars  0.56444    0.17708   3.187  0.00327 **
## gdp_per_capita_annual_growthrate_usd     0.08705    0.16511   0.527  0.60181   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.679 on 31 degrees of freedom
## Multiple R-squared:  0.8127, Adjusted R-squared:  0.7885 
## F-statistic: 33.63 on 4 and 31 DF,  p-value: 7.211e-11
plot_lin_pois_new_od(mod.new.od.2, "2", "Linear")

#Estimate covariance matrix with Newey West
mod.new.od.2.vcov <- vcovPL(mod.new.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.2 <-coeftest(mod.new.od.2, vcov = mod.new.od.2.vcov)
mod.new.od.nw.2
## 
## t test of coefficients:
## 
##                                          Estimate Std. Error t value  Pr(>|t|)
## (Intercept)                             -6.351220   1.759759 -3.6091 0.0010681
## as.factor(area)US                        3.533692   4.316055  0.8187 0.4191890
## yearsafter2004                           0.311113   0.343208  0.9065 0.3716727
## annual_domestic_RD_spending_bil_dollars  0.564440   0.152241  3.7075 0.0008175
## gdp_per_capita_annual_growthrate_usd     0.087047   0.161052  0.5405 0.5927195
##                                            
## (Intercept)                             ** 
## as.factor(area)US                          
## yearsafter2004                             
## annual_domestic_RD_spending_bil_dollars ***
## gdp_per_capita_annual_growthrate_usd       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.2.metrics <- get_new_od_model_metrics(mod.new.od.2, "Linear")

confint(mod.new.od.nw.2)
##                                              2.5 %     97.5 %
## (Intercept)                             -9.9402716 -2.7621681
## as.factor(area)US                       -5.2689605 12.3363440
## yearsafter2004                          -0.3888657  1.0110909
## annual_domestic_RD_spending_bil_dollars  0.2539426  0.8749366
## gdp_per_capita_annual_growthrate_usd    -0.2414197  0.4155144

Linear Model 3 of Annual MAs for NEW orphan drugs

mod.new.od.3 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004 , data=combined_rd_ma_df)
summary(mod.new.od.3)
## 
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.4278  -2.9585   0.0282   3.2874   8.5813 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           4.83138    2.53528   1.906 0.066005 .  
## as.factor(area)US                     3.32700    3.36173   0.990 0.329998    
## yearsafter2004                        0.65404    0.24250   2.697 0.011208 *  
## gdp_per_capita_annual_growthrate_usd  0.08599    0.15042   0.572 0.571687    
## as.factor(area)US:yearsafter2004      1.43784    0.33899   4.242 0.000186 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.205 on 31 degrees of freedom
## Multiple R-squared:  0.8426, Adjusted R-squared:  0.8223 
## F-statistic:  41.5 on 4 and 31 DF,  p-value: 5.012e-12
plot_lin_pois_new_od(mod.new.od.3, "3", "Linear")

#Estimate covariance matrix with Newey West
mod.new.od.3.vcov <- vcovPL(mod.new.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.3 <-coeftest(mod.new.od.3, vcov = mod.new.od.3.vcov)
mod.new.od.nw.3
## 
## t test of coefficients:
## 
##                                      Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                          4.831381   1.439561  3.3561  0.002102 ** 
## as.factor(area)US                    3.327002   2.407061  1.3822  0.176794    
## yearsafter2004                       0.654042   0.144359  4.5307 8.205e-05 ***
## gdp_per_capita_annual_growthrate_usd 0.085988   0.128458  0.6694  0.508206    
## as.factor(area)US:yearsafter2004     1.437838   0.247706  5.8046 2.142e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.3.metrics <- get_new_od_model_metrics(mod.new.od.3, "Linear")

confint(mod.new.od.nw.3)
##                                           2.5 %    97.5 %
## (Intercept)                           1.8953778 7.7673842
## as.factor(area)US                    -1.5822306 8.2362343
## yearsafter2004                        0.3596208 0.9484642
## gdp_per_capita_annual_growthrate_usd -0.1760037 0.3479788
## as.factor(area)US:yearsafter2004      0.9326396 1.9430373

Linear Model 4 of Annual MAs for NEW orphan drugs

mod.new.od.4 <- lm(freq_new_orphan_drug_mas ~  as.factor(area) + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd +  as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df)
summary(mod.new.od.4)
## 
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars + 
##     gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.6424  -3.6096  -0.1338   4.1221  11.4456 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                               -2.30838    5.65061
## as.factor(area)US                                         -4.97703    7.14154
## annual_domestic_RD_spending_bil_dollars                    0.51651    0.21453
## gdp_per_capita_annual_growthrate_usd                       0.02422    0.16353
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  0.20616    0.23451
##                                                           t value Pr(>|t|)  
## (Intercept)                                                -0.409   0.6857  
## as.factor(area)US                                          -0.697   0.4911  
## annual_domestic_RD_spending_bil_dollars                     2.408   0.0222 *
## gdp_per_capita_annual_growthrate_usd                        0.148   0.8832  
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   0.879   0.3861  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.668 on 31 degrees of freedom
## Multiple R-squared:  0.8134, Adjusted R-squared:  0.7893 
## F-statistic: 33.79 on 4 and 31 DF,  p-value: 6.797e-11
plot_lin_pois_new_od(mod.new.od.4, "4", "Linear")

#Estimate covariance matrix with Newey West
mod.new.od.4.vcov <- vcovPL(mod.new.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.4 <-coeftest(mod.new.od.4, vcov = mod.new.od.4.vcov)
mod.new.od.nw.4
## 
## t test of coefficients:
## 
##                                                            Estimate Std. Error
## (Intercept)                                               -2.308381   3.267658
## as.factor(area)US                                         -4.977025   4.041802
## annual_domestic_RD_spending_bil_dollars                    0.516511   0.123020
## gdp_per_capita_annual_growthrate_usd                       0.024225   0.133323
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  0.206155   0.100468
##                                                           t value  Pr(>|t|)    
## (Intercept)                                               -0.7064 0.4851951    
## as.factor(area)US                                         -1.2314 0.2274382    
## annual_domestic_RD_spending_bil_dollars                    4.1986 0.0002097 ***
## gdp_per_capita_annual_growthrate_usd                       0.1817 0.8570003    
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  2.0520 0.0487012 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.4.metrics <- get_new_od_model_metrics(mod.new.od.4, "Linear")

confint(mod.new.od.nw.4)
##                                                                   2.5 %
## (Intercept)                                                -8.972813840
## as.factor(area)US                                         -13.220335718
## annual_domestic_RD_spending_bil_dollars                     0.265609730
## gdp_per_capita_annual_growthrate_usd                       -0.247689347
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars   0.001250445
##                                                              97.5 %
## (Intercept)                                               4.3560519
## as.factor(area)US                                         3.2662853
## annual_domestic_RD_spending_bil_dollars                   0.7674114
## gdp_per_capita_annual_growthrate_usd                      0.2961392
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.4110603

Linear Model 5 of Annual MAs for NEW orphan drugs

mod.new.od.5 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df)
summary(mod.new.od.5)
## 
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + 
##     as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.9080  -3.0238   0.1602   3.5052   8.4363 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                                0.75206   10.92260
## as.factor(area)US                                          2.88653   12.50519
## yearsafter2004                                             0.31846    0.87540
## annual_domestic_RD_spending_bil_dollars                    0.27966    0.71115
## gdp_per_capita_annual_growthrate_usd                       0.06997    0.15450
## as.factor(area)US:yearsafter2004                           1.28754    1.08180
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.09153    0.74724
##                                                           t value Pr(>|t|)
## (Intercept)                                                 0.069    0.946
## as.factor(area)US                                           0.231    0.819
## yearsafter2004                                              0.364    0.719
## annual_domestic_RD_spending_bil_dollars                     0.393    0.697
## gdp_per_capita_annual_growthrate_usd                        0.453    0.654
## as.factor(area)US:yearsafter2004                            1.190    0.244
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  -0.122    0.903
## 
## Residual standard error: 5.307 on 29 degrees of freedom
## Multiple R-squared:  0.847,  Adjusted R-squared:  0.8153 
## F-statistic: 26.75 on 6 and 29 DF,  p-value: 1.421e-10
plot_lin_pois_new_od(mod.new.od.5, "5", "Linear")

#Estimate covariance matrix with Newey West
mod.new.od.5.vcov <- vcovPL(mod.new.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.5 <-coeftest(mod.new.od.5, vcov = mod.new.od.5.vcov)
mod.new.od.nw.5
## 
## t test of coefficients:
## 
##                                                            Estimate Std. Error
## (Intercept)                                                0.752058   7.161922
## as.factor(area)US                                          2.886533   6.422452
## yearsafter2004                                             0.318456   0.554784
## annual_domestic_RD_spending_bil_dollars                    0.279663   0.451301
## gdp_per_capita_annual_growthrate_usd                       0.069973   0.135317
## as.factor(area)US:yearsafter2004                           1.287536   0.720547
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.091526   0.448853
##                                                           t value Pr(>|t|)  
## (Intercept)                                                0.1050  0.91709  
## as.factor(area)US                                          0.4494  0.65645  
## yearsafter2004                                             0.5740  0.57038  
## annual_domestic_RD_spending_bil_dollars                    0.6197  0.54031  
## gdp_per_capita_annual_growthrate_usd                       0.5171  0.60901  
## as.factor(area)US:yearsafter2004                           1.7869  0.08441 .
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.2039  0.83985  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.5.metrics <- get_new_od_model_metrics(mod.new.od.5, "Linear")

confint(mod.new.od.nw.5)
##                                                                 2.5 %
## (Intercept)                                               -13.8957165
## as.factor(area)US                                         -10.2488567
## yearsafter2004                                             -0.8162052
## annual_domestic_RD_spending_bil_dollars                    -0.6433509
## gdp_per_capita_annual_growthrate_usd                       -0.2067814
## as.factor(area)US:yearsafter2004                           -0.1861481
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  -1.0095335
##                                                               97.5 %
## (Intercept)                                               15.3998321
## as.factor(area)US                                         16.0219220
## yearsafter2004                                             1.4531173
## annual_domestic_RD_spending_bil_dollars                    1.2026759
## gdp_per_capita_annual_growthrate_usd                       0.3467273
## as.factor(area)US:yearsafter2004                           2.7612196
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  0.8264817
mod.new.od.metrics <- rbind(mod.new.od.1.metrics, mod.new.od.2.metrics, mod.new.od.3.metrics, mod.new.od.4.metrics, mod.new.od.5.metrics)
aic_values <- round(mod.new.od.metrics$aic)
rmse_values <- round(mod.new.od.metrics$rmse, 2)
fstat_values <- round(mod.new.od.metrics$fstat, 2)
p_values <- round(mod.new.od.metrics$pval, 4)
adjrsq_values <- round(mod.new.od.metrics$adjrsq, 2)

#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.new.od.1
m2 <- mod.new.od.2
m3 <- mod.new.od.3
m4 <- mod.new.od.4
m5 <- mod.new.od.5 


stargazer(m1, m2, m3, m4, m5, title="Linear Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/NEWorphandrugMAmodelsNonAdjusted.htm",
          covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)", "Intercept (β<sup>̂</sup><sub>0</sub>)"),
          dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Linear Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>15.556<sup>***</sup></td><td>3.534</td><td>3.327</td><td>-4.977</td><td>2.887</td></tr>
## <tr><td style="text-align:left"></td><td>(2.149)</td><td>(4.216)</td><td>(3.362)</td><td>(7.142)</td><td>(12.505)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>1.358<sup>***</sup></td><td>0.311</td><td>0.654<sup>**</sup></td><td></td><td>0.318</td></tr>
## <tr><td style="text-align:left"></td><td>(0.207)</td><td>(0.386)</td><td>(0.242)</td><td></td><td>(0.875)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.564<sup>***</sup></td><td></td><td>0.517<sup>**</sup></td><td>0.280</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.177)</td><td></td><td>(0.215)</td><td>(0.711)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.087</td><td>0.086</td><td>0.024</td><td>0.070</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.165)</td><td>(0.150)</td><td>(0.164)</td><td>(0.155)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>1.438<sup>***</sup></td><td></td><td>1.288</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.339)</td><td></td><td>(1.082)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>0.206</td><td>-0.092</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.235)</td><td>(0.747)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>-0.877</td><td>-6.351<sup>**</sup></td><td>4.831<sup>*</sup></td><td>-2.308</td><td>0.752</td></tr>
## <tr><td style="text-align:left"></td><td>(2.326)</td><td>(2.615)</td><td>(2.535)</td><td>(5.651)</td><td>(10.923)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>241</td><td>234</td><td>228</td><td>234</td><td>231</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>6.17</td><td>5.27</td><td>4.83</td><td>5.26</td><td>4.76</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.73</td><td>0.79</td><td>0.82</td><td>0.79</td><td>0.82</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>47.69</td><td>33.63</td><td>41.5</td><td>33.79</td><td>26.75</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.new.od.nw.1
m2 <- mod.new.od.nw.2
m3 <- mod.new.od.nw.3
m4 <- mod.new.od.nw.4
m5 <- mod.new.od.nw.5 


stargazer(m1, m2, m3, m4, m5, title="Newey-West Adjusted Linear Models of Market Authorizations for New Orphan Drugs", align=TRUE, type = "html", out="Final Models Formatted/NEWorphandrugMAmodels.htm",
          covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)", "Intercept (β<sup>̂</sup><sub>0</sub>)"),
          dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Market Authorizations for New Orphan Drugs</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>15.556<sup>***</sup></td><td>3.534</td><td>3.327</td><td>-4.977</td><td>2.887</td></tr>
## <tr><td style="text-align:left"></td><td>(3.229)</td><td>(4.316)</td><td>(2.407)</td><td>(4.042)</td><td>(6.422)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>1.358<sup>***</sup></td><td>0.311</td><td>0.654<sup>***</sup></td><td></td><td>0.318</td></tr>
## <tr><td style="text-align:left"></td><td>(0.133)</td><td>(0.343)</td><td>(0.144)</td><td></td><td>(0.555)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.564<sup>***</sup></td><td></td><td>0.517<sup>***</sup></td><td>0.280</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.152)</td><td></td><td>(0.123)</td><td>(0.451)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.087</td><td>0.086</td><td>0.024</td><td>0.070</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.161)</td><td>(0.128)</td><td>(0.133)</td><td>(0.135)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>1.438<sup>***</sup></td><td></td><td>1.288<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.248)</td><td></td><td>(0.721)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>0.206<sup>**</sup></td><td>-0.092</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.100)</td><td>(0.449)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>-0.877</td><td>-6.351<sup>***</sup></td><td>4.831<sup>***</sup></td><td>-2.308</td><td>0.752</td></tr>
## <tr><td style="text-align:left"></td><td>(2.559)</td><td>(1.760)</td><td>(1.440)</td><td>(3.268)</td><td>(7.162)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>241</td><td>234</td><td>228</td><td>234</td><td>231</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>6.17</td><td>5.27</td><td>4.83</td><td>5.26</td><td>4.76</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.73</td><td>0.79</td><td>0.82</td><td>0.79</td><td>0.82</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>47.69</td><td>33.63</td><td>41.5</td><td>33.79</td><td>26.75</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

Overall mean of Domestic R&D spending

mean(combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars)
## [1] 35.61325

Overall mean of US Domestic R&D spending

mean(subset(combined_rd_ma_df ,area=="US")$annual_domestic_RD_spending_bil_dollars)
## [1] 46.25642

Overall mean of annual EU Domestic R&D spending

mean(subset(combined_rd_ma_df ,area=="EU")$annual_domestic_RD_spending_bil_dollars)
## [1] 24.97008

Overall mean of annual number of US Market Authroized Orphan Drugs

mean(subset(combined_rd_ma_df ,area=="US")$freq_orphan_drug_mas)
## [1] 43.77778
mean(subset(combined_rd_ma_df ,area=="US")$freq_new_orphan_drug_mas)
## [1] 26.22222

Overall mean of annual number of EU Market Authroized Orphan Drugs

mean(subset(combined_rd_ma_df ,area=="EU")$freq_orphan_drug_mas)
## [1] 10.83333
mean(subset(combined_rd_ma_df ,area=="EU")$freq_new_orphan_drug_mas)
## [1] 10.66667

Poisson Models for Number of Market Authorizations for NEW Orphan Drugs

Poisson model 1 of Annual Num of MAs for NEW orphan drugs

mod.pois.new.od.1 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004  , data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.1)
## 
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       1.645620   0.109281  15.059   <2e-16 ***
## as.factor(area)US 0.899484   0.085598  10.508   <2e-16 ***
## yearsafter2004    0.075900   0.007829   9.695   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 267.814  on 35  degrees of freedom
## Residual deviance:  47.599  on 33  degrees of freedom
## AIC: 217.38
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.1, "1", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.new.od.1.vcov <- vcovPL(mod.pois.new.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.1 <- coeftest(mod.pois.new.od.1, vcov = mod.pois.new.od.1.vcov)
mod.pois.new.od.nw.1
## 
## z test of coefficients:
## 
##                    Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)       1.6456196  0.1219637  13.493 < 2.2e-16 ***
## as.factor(area)US 0.8994836  0.0851551  10.563 < 2.2e-16 ***
## yearsafter2004    0.0758998  0.0068369  11.101 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.1.metrics <- get_new_od_model_metrics(mod.pois.new.od.1, "Poisson")

confint(mod.pois.new.od.nw.1)
##                        2.5 %    97.5 %
## (Intercept)       1.40657514 1.8846640
## as.factor(area)US 0.73258262 1.0663846
## yearsafter2004    0.06249974 0.0892999

Poisson model 2 of Annual Num of MAs for NEW orphan drugs

mod.pois.new.od.2 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.2)
## 
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                             1.635504   0.110626  14.784  < 2e-16
## as.factor(area)US                       0.846810   0.177685   4.766 1.88e-06
## yearsafter2004                          0.070676   0.017502   4.038 5.39e-05
## annual_domestic_RD_spending_bil_dollars 0.002101   0.006431   0.327    0.744
## gdp_per_capita_annual_growthrate_usd    0.002571   0.008073   0.319    0.750
##                                            
## (Intercept)                             ***
## as.factor(area)US                       ***
## yearsafter2004                          ***
## annual_domestic_RD_spending_bil_dollars    
## gdp_per_capita_annual_growthrate_usd       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 267.81  on 35  degrees of freedom
## Residual deviance:  47.34  on 31  degrees of freedom
## AIC: 221.12
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.2, "2", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.new.od.2.vcov <- vcovPL(mod.pois.new.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.2 <- coeftest(mod.pois.new.od.2, vcov = mod.pois.new.od.2.vcov)
mod.pois.new.od.nw.2
## 
## z test of coefficients:
## 
##                                          Estimate Std. Error z value  Pr(>|z|)
## (Intercept)                             1.6355035  0.1076405 15.1941 < 2.2e-16
## as.factor(area)US                       0.8468101  0.1606519  5.2711 1.356e-07
## yearsafter2004                          0.0706765  0.0162214  4.3570 1.319e-05
## annual_domestic_RD_spending_bil_dollars 0.0021014  0.0050413  0.4168    0.6768
## gdp_per_capita_annual_growthrate_usd    0.0025715  0.0115537  0.2226    0.8239
##                                            
## (Intercept)                             ***
## as.factor(area)US                       ***
## yearsafter2004                          ***
## annual_domestic_RD_spending_bil_dollars    
## gdp_per_capita_annual_growthrate_usd       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.2.metrics <- get_new_od_model_metrics(mod.pois.new.od.2, "Poisson")

confint(mod.pois.new.od.nw.2)
##                                                2.5 %     97.5 %
## (Intercept)                              1.424532098 1.84647494
## as.factor(area)US                        0.531938159 1.16168213
## yearsafter2004                           0.038883219 0.10246977
## annual_domestic_RD_spending_bil_dollars -0.007779401 0.01198222
## gdp_per_capita_annual_growthrate_usd    -0.020073322 0.02521630

Poisson model 3 of Annual Num of MAs for NEW orphan drugs

mod.pois.new.od.3 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.3)
## 
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                          1.804979   0.164882  10.947  < 2e-16 ***
## as.factor(area)US                    0.663508   0.195491   3.394 0.000689 ***
## yearsafter2004                       0.059871   0.014328   4.179 2.93e-05 ***
## gdp_per_capita_annual_growthrate_usd 0.001917   0.007922   0.242 0.808790    
## as.factor(area)US:yearsafter2004     0.022662   0.017193   1.318 0.187464    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 267.814  on 35  degrees of freedom
## Residual deviance:  45.714  on 31  degrees of freedom
## AIC: 219.49
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.3, "3", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.new.od.3.vcov <- vcovPL(mod.pois.new.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.3 <- coeftest(mod.pois.new.od.3, vcov = mod.pois.new.od.3.vcov)
mod.pois.new.od.nw.3
## 
## z test of coefficients:
## 
##                                      Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)                          1.804979   0.182915  9.8678 < 2.2e-16 ***
## as.factor(area)US                    0.663508   0.178617  3.7147 0.0002035 ***
## yearsafter2004                       0.059871   0.015031  3.9831 6.801e-05 ***
## gdp_per_capita_annual_growthrate_usd 0.001917   0.010548  0.1817 0.8557823    
## as.factor(area)US:yearsafter2004     0.022662   0.013997  1.6191 0.1054195    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.3.metrics <- get_new_od_model_metrics(mod.pois.new.od.3, "Poisson")

confint(mod.pois.new.od.nw.3)
##                                             2.5 %     97.5 %
## (Intercept)                           1.446471475 2.16348686
## as.factor(area)US                     0.313425021 1.01359188
## yearsafter2004                        0.030410385 0.08933097
## gdp_per_capita_annual_growthrate_usd -0.018756022 0.02259000
## as.factor(area)US:yearsafter2004     -0.004770509 0.05009488

Poisson model 4 of Annual Num of MAs for NEW orphan drugs

mod.pois.new.od.4 <- glm(freq_new_orphan_drug_mas ~  as.factor(area)  + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df, family=poisson(link="log")) 
summary(mod.pois.new.od.4)
## 
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars + 
##     gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                1.180839   0.312220
## as.factor(area)US                                          0.905994   0.347318
## annual_domestic_RD_spending_bil_dollars                    0.045914   0.011172
## gdp_per_capita_annual_growthrate_usd                      -0.001498   0.007761
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.021750   0.011578
##                                                           z value Pr(>|z|)    
## (Intercept)                                                 3.782 0.000156 ***
## as.factor(area)US                                           2.609 0.009093 ** 
## annual_domestic_RD_spending_bil_dollars                     4.110 3.96e-05 ***
## gdp_per_capita_annual_growthrate_usd                       -0.193 0.846973    
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  -1.879 0.060301 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 267.814  on 35  degrees of freedom
## Residual deviance:  60.354  on 31  degrees of freedom
## AIC: 234.13
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.4, "4", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.new.od.4.vcov <- vcovPL(mod.pois.new.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.4 <- coeftest(mod.pois.new.od.4, vcov = mod.pois.new.od.4.vcov)
mod.pois.new.od.nw.4
## 
## z test of coefficients:
## 
##                                                             Estimate Std. Error
## (Intercept)                                                1.1808391  0.3401473
## as.factor(area)US                                          0.9059936  0.3466624
## annual_domestic_RD_spending_bil_dollars                    0.0459142  0.0115817
## gdp_per_capita_annual_growthrate_usd                      -0.0014978  0.0104463
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.0217502  0.0105705
##                                                           z value  Pr(>|z|)    
## (Intercept)                                                3.4716 0.0005175 ***
## as.factor(area)US                                          2.6135 0.0089627 ** 
## annual_domestic_RD_spending_bil_dollars                    3.9644 7.359e-05 ***
## gdp_per_capita_annual_growthrate_usd                      -0.1434 0.8859913    
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -2.0576 0.0396262 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.4.metrics <- get_new_od_model_metrics(mod.pois.new.od.4, "Poisson")

confint(mod.pois.new.od.nw.4)
##                                                                 2.5 %
## (Intercept)                                                0.51416259
## as.factor(area)US                                          0.22654767
## annual_domestic_RD_spending_bil_dollars                    0.02321443
## gdp_per_capita_annual_growthrate_usd                      -0.02197220
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.04246807
##                                                                 97.5 %
## (Intercept)                                                1.847515641
## as.factor(area)US                                          1.585439503
## annual_domestic_RD_spending_bil_dollars                    0.068613875
## gdp_per_capita_annual_growthrate_usd                       0.018976647
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.001032325

Poisson model 5 of Annual Num of MAs for NEW orphan drugs

mod.pois.new.od.5 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd
 + as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df, family=poisson(link="log")) 
summary(mod.pois.new.od.5)
## 
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + 
##     as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                                                            Estimate Std. Error
## (Intercept)                                                1.594788   0.649095
## as.factor(area)US                                          0.991965   0.680585
## yearsafter2004                                             0.042603   0.054937
## annual_domestic_RD_spending_bil_dollars                    0.014167   0.043172
## gdp_per_capita_annual_growthrate_usd                       0.003007   0.008077
## as.factor(area)US:yearsafter2004                           0.057304   0.060991
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.020057   0.044019
##                                                           z value Pr(>|z|)  
## (Intercept)                                                 2.457    0.014 *
## as.factor(area)US                                           1.458    0.145  
## yearsafter2004                                              0.775    0.438  
## annual_domestic_RD_spending_bil_dollars                     0.328    0.743  
## gdp_per_capita_annual_growthrate_usd                        0.372    0.710  
## as.factor(area)US:yearsafter2004                            0.940    0.347  
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars  -0.456    0.649  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 267.814  on 35  degrees of freedom
## Residual deviance:  45.111  on 29  degrees of freedom
## AIC: 222.89
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.5, "5", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.new.od.5.vcov <- vcovPL(mod.pois.new.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.5 <- coeftest(mod.pois.new.od.5, vcov = mod.pois.new.od.5.vcov)
mod.pois.new.od.nw.5
## 
## z test of coefficients:
## 
##                                                             Estimate Std. Error
## (Intercept)                                                1.5947882  0.7374296
## as.factor(area)US                                          0.9919651  0.6904769
## yearsafter2004                                             0.0426032  0.0598991
## annual_domestic_RD_spending_bil_dollars                    0.0141670  0.0472703
## gdp_per_capita_annual_growthrate_usd                       0.0030073  0.0108619
## as.factor(area)US:yearsafter2004                           0.0573035  0.0657810
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.0200566  0.0476424
##                                                           z value Pr(>|z|)  
## (Intercept)                                                2.1626  0.03057 *
## as.factor(area)US                                          1.4366  0.15082  
## yearsafter2004                                             0.7112  0.47693  
## annual_domestic_RD_spending_bil_dollars                    0.2997  0.76440  
## gdp_per_capita_annual_growthrate_usd                       0.2769  0.78188  
## as.factor(area)US:yearsafter2004                           0.8711  0.38369  
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.4210  0.67377  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.5.metrics <- get_new_od_model_metrics(mod.pois.new.od.5, "Poisson")

confint(mod.pois.new.od.nw.5)
##                                                                 2.5 %
## (Intercept)                                                0.14945274
## as.factor(area)US                                         -0.36134473
## yearsafter2004                                            -0.07479700
## annual_domestic_RD_spending_bil_dollars                   -0.07848104
## gdp_per_capita_annual_growthrate_usd                      -0.01828168
## as.factor(area)US:yearsafter2004                          -0.07162484
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.11343401
##                                                               97.5 %
## (Intercept)                                               3.04012362
## as.factor(area)US                                         2.34527483
## yearsafter2004                                            0.16000333
## annual_domestic_RD_spending_bil_dollars                   0.10681501
## gdp_per_capita_annual_growthrate_usd                      0.02429626
## as.factor(area)US:yearsafter2004                          0.18623189
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.07332071
mod.pois.new.od.metrics <- rbind(mod.pois.new.od.1.metrics, mod.pois.new.od.2.metrics, mod.pois.new.od.3.metrics, mod.pois.new.od.4.metrics, mod.pois.new.od.5.metrics)
aic_values <- round(mod.pois.new.od.metrics$aic)
rmse_values <- round(mod.pois.new.od.metrics$rmse, 2)
loglik_values <- round(mod.pois.new.od.metrics$LogLik, 2)

#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.pois.new.od.1
m2 <- mod.pois.new.od.2
m3 <- mod.pois.new.od.3
m4 <- mod.pois.new.od.4
m5 <- mod.pois.new.od.5 

stargazer(m1, m2, m3, m4, m5, title="Poisson Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/PoissonNEWorphandrugMAmodelsNonAdjusted.htm",
          covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)" , "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)" , "Intercept (β<sup>̂</sup><sub>0</sub>)"),
          dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", loglik_values)))
## 
## <table style="text-align:center"><caption><strong>Poisson Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>0.899<sup>***</sup></td><td>0.847<sup>***</sup></td><td>0.664<sup>***</sup></td><td>0.906<sup>***</sup></td><td>0.992</td></tr>
## <tr><td style="text-align:left"></td><td>(0.086)</td><td>(0.178)</td><td>(0.195)</td><td>(0.347)</td><td>(0.681)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>0.076<sup>***</sup></td><td>0.071<sup>***</sup></td><td>0.060<sup>***</sup></td><td></td><td>0.043</td></tr>
## <tr><td style="text-align:left"></td><td>(0.008)</td><td>(0.018)</td><td>(0.014)</td><td></td><td>(0.055)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.002</td><td></td><td>0.046<sup>***</sup></td><td>0.014</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.006)</td><td></td><td>(0.011)</td><td>(0.043)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.003</td><td>0.002</td><td>-0.001</td><td>0.003</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.008)</td><td>(0.008)</td><td>(0.008)</td><td>(0.008)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>0.023</td><td></td><td>0.057</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.017)</td><td></td><td>(0.061)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>-0.022<sup>*</sup></td><td>-0.020</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.012)</td><td>(0.044)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>1.646<sup>***</sup></td><td>1.636<sup>***</sup></td><td>1.805<sup>***</sup></td><td>1.181<sup>***</sup></td><td>1.595<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.109)</td><td>(0.111)</td><td>(0.165)</td><td>(0.312)</td><td>(0.649)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>217</td><td>221</td><td>219</td><td>234</td><td>223</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>4.77</td><td>4.82</td><td>4.72</td><td>5.68</td><td>4.67</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-105.69</td><td>-105.56</td><td>-104.75</td><td>-112.07</td><td>-104.44</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.pois.new.od.nw.1
m2 <- mod.pois.new.od.nw.2
m3 <- mod.pois.new.od.nw.3
m4 <- mod.pois.new.od.nw.4
m5 <- mod.pois.new.od.nw.5 


stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Poisson Models of Market Authorizations for New Orphan Drugs ", align=TRUE, type = "html", out="Final Models Formatted/PoissonNEWorphandrugMAmodels.htm",
          covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)" , "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)" , "Intercept (β<sup>̂</sup><sub>0</sub>)"),
          dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", loglik_values)))
## 
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Poisson Models of Market Authorizations for New Orphan Drugs</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>0.899<sup>***</sup></td><td>0.847<sup>***</sup></td><td>0.664<sup>***</sup></td><td>0.906<sup>***</sup></td><td>0.992</td></tr>
## <tr><td style="text-align:left"></td><td>(0.085)</td><td>(0.161)</td><td>(0.179)</td><td>(0.347)</td><td>(0.690)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>0.076<sup>***</sup></td><td>0.071<sup>***</sup></td><td>0.060<sup>***</sup></td><td></td><td>0.043</td></tr>
## <tr><td style="text-align:left"></td><td>(0.007)</td><td>(0.016)</td><td>(0.015)</td><td></td><td>(0.060)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.002</td><td></td><td>0.046<sup>***</sup></td><td>0.014</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.005)</td><td></td><td>(0.012)</td><td>(0.047)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.003</td><td>0.002</td><td>-0.001</td><td>0.003</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.012)</td><td>(0.011)</td><td>(0.010)</td><td>(0.011)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>0.023</td><td></td><td>0.057</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.014)</td><td></td><td>(0.066)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>-0.022<sup>**</sup></td><td>-0.020</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.011)</td><td>(0.048)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>1.646<sup>***</sup></td><td>1.636<sup>***</sup></td><td>1.805<sup>***</sup></td><td>1.181<sup>***</sup></td><td>1.595<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.122)</td><td>(0.108)</td><td>(0.183)</td><td>(0.340)</td><td>(0.737)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>217</td><td>221</td><td>219</td><td>234</td><td>223</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>4.77</td><td>4.82</td><td>4.72</td><td>5.68</td><td>4.67</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-105.69</td><td>-105.56</td><td>-104.75</td><td>-112.07</td><td>-104.44</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>